Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 80 additions & 57 deletions ddprof-lib/src/main/cpp/j9/j9WallClock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "j9/j9Support.h"
#include "profiler.h"
#include "threadState.h"
#include <cxxabi.h>
#include <stdlib.h>

volatile bool J9WallClock::_enabled = false;
Expand Down Expand Up @@ -57,68 +58,90 @@ void J9WallClock::stop() {
}

void J9WallClock::timerLoop() {
JNIEnv *jni = VM::attachThread("java-profiler Sampler");
jvmtiEnv *jvmti = VM::jvmti();

int max_frames = _max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES;
ASGCT_CallFrame *frames =
(ASGCT_CallFrame *)malloc(max_frames * sizeof(ASGCT_CallFrame));

while (_running) {
if (!_enabled) {
OS::sleep(_interval);
continue;
}

jni->PushLocalFrame(64);

jvmtiStackInfoExtended *stack_infos;
jint thread_count;
if (J9Support::GetAllStackTracesExtended(
_max_stack_depth, (void **)&stack_infos, &thread_count) == 0) {
for (int i = 0; i < thread_count; i++) {
jvmtiStackInfoExtended *si = &stack_infos[i];
if (si->frame_count <= 0) {
// no frames recorded
continue;
}
OSThreadState ts = (si->state & JVMTI_THREAD_STATE_RUNNABLE)
? OSThreadState::RUNNABLE
: OSThreadState::SLEEPING;
if (!_sample_idle_threads && ts != OSThreadState::RUNNABLE) {
// in execution profiler mode the non-running threads are skipped
continue;
}
for (int j = 0; j < si->frame_count; j++) {
jvmtiFrameInfoExtended *fi = &si->frame_buffer[j];
frames[j].method_id = fi->method;
frames[j].bci = FrameType::encode(sanitizeJ9FrameType(fi->type), fi->location);
}
// IBM J9 may cancel this thread via abi::__forced_unwind during JVM shutdown.
// Catch it to ensure free(frames) and VM::detachThread() always run — including
// if the unwind fires during setup (VM::attachThread or malloc) before the
// sampling loop starts — then re-throw so the thread exits properly through
// the normal unwind path. jni and frames are declared before the try and
// initialized to null so the catch handler's cleanup is safe in either case:
// free(nullptr) is a no-op, and detachThread() is only called when attach
// succeeded.
//
// PushLocalFrame / PopLocalFrame balance: if the forced unwind fires between
// PushLocalFrame and PopLocalFrame, the outstanding JNI local frame is released
// by DetachCurrentThread (called via VM::detachThread()), which is specified to
// destroy the current stack frame's local references. The common cancellation
// point is OS::sleep() which runs after PopLocalFrame, so no imbalance occurs
// in the typical case.
JNIEnv *jni = nullptr;
ASGCT_CallFrame *frames = nullptr;
try {
jni = VM::attachThread("java-profiler Sampler");
jvmtiEnv *jvmti = VM::jvmti();

int max_frames = _max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES;
frames = (ASGCT_CallFrame *)malloc(max_frames * sizeof(ASGCT_CallFrame));

while (_running) {
if (!_enabled) {
OS::sleep(_interval);
continue;
}

int tid = J9Support::GetOSThreadID(si->thread);
if (tid == -1) {
// clearly an invalid TID; skip the thread
continue;
}
ExecutionEvent event;
event._thread_state = ts;
if (ts == OSThreadState::RUNNABLE) {
Profiler::instance()->recordExternalSample(
_interval, tid, si->frame_count, frames, /*truncated=*/false,
BCI_CPU, &event);
}
if (_sample_idle_threads) {
Profiler::instance()->recordExternalSample(
_interval, tid, si->frame_count, frames, /*truncated=*/false,
BCI_WALL, &event);
jni->PushLocalFrame(64);

jvmtiStackInfoExtended *stack_infos;
jint thread_count;
if (J9Support::GetAllStackTracesExtended(
_max_stack_depth, (void **)&stack_infos, &thread_count) == 0) {
for (int i = 0; i < thread_count; i++) {
jvmtiStackInfoExtended *si = &stack_infos[i];
if (si->frame_count <= 0) {
// no frames recorded
continue;
}
OSThreadState ts = (si->state & JVMTI_THREAD_STATE_RUNNABLE)
? OSThreadState::RUNNABLE
: OSThreadState::SLEEPING;
if (!_sample_idle_threads && ts != OSThreadState::RUNNABLE) {
// in execution profiler mode the non-running threads are skipped
continue;
}
for (int j = 0; j < si->frame_count; j++) {
jvmtiFrameInfoExtended *fi = &si->frame_buffer[j];
frames[j].method_id = fi->method;
frames[j].bci = FrameType::encode(sanitizeJ9FrameType(fi->type), fi->location);
}

int tid = J9Support::GetOSThreadID(si->thread);
if (tid == -1) {
// clearly an invalid TID; skip the thread
continue;
}
ExecutionEvent event;
event._thread_state = ts;
if (ts == OSThreadState::RUNNABLE) {
Profiler::instance()->recordExternalSample(
_interval, tid, si->frame_count, frames, /*truncated=*/false,
BCI_CPU, &event);
}
if (_sample_idle_threads) {
Profiler::instance()->recordExternalSample(
_interval, tid, si->frame_count, frames, /*truncated=*/false,
BCI_WALL, &event);
}
}
jvmti->Deallocate((unsigned char *)stack_infos);
}
jvmti->Deallocate((unsigned char *)stack_infos);
}

jni->PopLocalFrame(NULL);
jni->PopLocalFrame(NULL);

OS::sleep(_interval);
OS::sleep(_interval);
}
} catch (abi::__forced_unwind&) {
free(frames);
VM::detachThread(); // DetachCurrentThread releases any outstanding JNI local frames
throw;
}

free(frames);
Expand Down
71 changes: 42 additions & 29 deletions ddprof-lib/src/main/cpp/libraryPatcher_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "guards.h"

#include <cassert>
#include <cxxabi.h>
#include <dlfcn.h>
#include <limits.h>
#include <string.h>
Expand Down Expand Up @@ -64,21 +65,16 @@ static void delete_routine_info(RoutineInfo* thr) {
delete thr;
}

// Initialize the current thread's TLS with profiling signals blocked.
// Kept noinline for the same stack-protector reason as delete_routine_info.
__attribute__((noinline))
static void init_thread_tls() {
SignalBlocker blocker;
ProfiledThread::initCurrentThread();
}

// Arm the CPU timer with profiling signals blocked and open the init window
// (PROF-13072). Kept noinline for the same stack-protector reason as
// Initialize the current thread's TLS, open the init window (PROF-13072), and
// register the thread with the profiler — all under a single SignalBlocker so
// profiling signals cannot fire in the gap between initCurrentThread() and
// startInitWindow(). Kept noinline for the same stack-protector reason as
// delete_routine_info: SignalBlocker's sigset_t must not appear in
// start_routine_wrapper_spec's own stack frame on musl/aarch64.
__attribute__((noinline))
static void start_window_and_register() {
static void init_tls_and_register() {
SignalBlocker blocker;
ProfiledThread::initCurrentThread();
if (ProfiledThread *pt = ProfiledThread::currentSignalSafe()) {
pt->startInitWindow();
}
Expand All @@ -98,12 +94,25 @@ static void* start_routine_wrapper_spec(void* args) {
func_start_routine routine = thr->routine();
void* params = thr->args();
delete_routine_info(thr);
init_thread_tls();
start_window_and_register();
void* result = routine(params);
Profiler::unregisterThread(ProfiledThread::currentTid());
init_tls_and_register();
// Capture tid from TLS while it is guaranteed non-null (set by init_tls_and_register above).
// Using a cached tid avoids the lazy-allocating ProfiledThread::current() path inside
// the catch block, which may call 'new' at an unsafe point during forced unwind.
int tid = ProfiledThread::currentTid();
// IBM J9 (and glibc pthread_cancel) use abi::__forced_unwind for thread teardown.
// Catch it explicitly so cleanup runs even during forced unwind, then re-throw
// to allow the thread to exit properly. A plain catch(...) without re-throw
// would swallow the forced unwind and prevent the thread from actually exiting.
try {
routine(params);
} catch (abi::__forced_unwind&) {
Profiler::unregisterThread(tid);
ProfiledThread::release();
throw;
}
Profiler::unregisterThread(tid);
ProfiledThread::release();
return result;
return nullptr;
}

static int pthread_create_hook_spec(pthread_t* thread,
Expand All @@ -125,12 +134,6 @@ static int pthread_create_hook_spec(pthread_t* thread,

#endif // __aarch64__

static void thread_cleanup(void* arg) {
int tid = *static_cast<int*>(arg);
Profiler::unregisterThread(tid);
ProfiledThread::release();
}

// Wrapper around the real start routine.
// See comments for start_routine_wrapper_spec() for details
__attribute__((visibility("hidden")))
Expand Down Expand Up @@ -160,13 +163,23 @@ static void* start_routine_wrapper(void* args) {
ProfiledThread::currentSignalSafe()->startInitWindow();
Profiler::registerThread(tid);
}
void* result = nullptr;
// Handle pthread_exit() bypass - the thread calls pthread_exit()
// instead of normal termination
pthread_cleanup_push(thread_cleanup, &tid);
result = routine(params);
pthread_cleanup_pop(1);
return result;
// IBM J9 (and glibc pthread_cancel) use abi::__forced_unwind for thread
// teardown. pthread_cleanup_push/pop creates a __pthread_cleanup_class
// with an implicitly-noexcept destructor; when J9's forced-unwind
// propagates through it, the C++ runtime calls std::terminate() → abort().
// Replacing with an explicit catch ensures cleanup runs on forced unwind
// without triggering terminate, and the re-throw lets the thread exit cleanly.
// pthread_exit() is also covered: on glibc it raises its own __forced_unwind.
try {
routine(params);
} catch (abi::__forced_unwind&) {
Profiler::unregisterThread(tid);
ProfiledThread::release();
throw;
}
Profiler::unregisterThread(tid);
ProfiledThread::release();
return nullptr;
}

static int pthread_create_hook(pthread_t* thread,
Expand Down
Loading
Loading