Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions ddprof-lib/src/main/cpp/arguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,22 @@ Error Arguments::parse(const char *args) {
}
}

CASE("nosanity")
if (value != NULL) {
switch (value[0]) {
case 'n': // no
case 'f': // false
case '0': // 0
_skip_sanity_checks = false;
break;
default:
_skip_sanity_checks = true;
}
} else {
// bare 'nosanity' with no value means skip checks
_skip_sanity_checks = true;
}

DEFAULT()
if (_unknown_arg == NULL)
_unknown_arg = arg;
Expand Down
4 changes: 3 additions & 1 deletion ddprof-lib/src/main/cpp/arguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ class Arguments {
bool _lightweight;
bool _enable_method_cleanup;
bool _remote_symbolication; // Enable remote symbolication for native frames
bool _skip_sanity_checks;

Arguments(bool persistent = false)
: _buf(NULL),
Expand Down Expand Up @@ -223,7 +224,8 @@ class Arguments {
_context_attributes({}),
_lightweight(false),
_enable_method_cleanup(true),
_remote_symbolication(false) {}
_remote_symbolication(false),
_skip_sanity_checks(false) {}

~Arguments();

Expand Down
2 changes: 2 additions & 0 deletions ddprof-lib/src/main/cpp/os.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ class OS {

static bool getCpuDescription(char* buf, size_t size);
static int getCpuCount();
static int getCgroupCpuMillicores();
static long getContainerMemoryLimit();
static u64 getProcessCpuTime(u64* utime, u64* stime);
static u64 getTotalCpuTime(u64* utime, u64* stime);

Expand Down
84 changes: 84 additions & 0 deletions ddprof-lib/src/main/cpp/os_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,90 @@ int OS::getCpuCount() {
return sysconf(_SC_NPROCESSORS_ONLN);
}

int OS::getCgroupCpuMillicores() {
// Try cgroup v2 first
int fd = open("/sys/fs/cgroup/cpu.max", O_RDONLY);
if (fd != -1) {
char buf[64] = {0};
ssize_t r = read(fd, buf, sizeof(buf) - 1);
close(fd);
if (r > 0) {
if (strncmp(buf, "max", 3) == 0) {
return -1; // unconstrained
}
long quota, period;
if (sscanf(buf, "%ld %ld", &quota, &period) == 2 && period > 0) {
return (int)(quota * 1000 / period);
}
}
}

// Fall back to cgroup v1
long quota = -1;
long period = 100000; // default 100ms

fd = open("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", O_RDONLY);
if (fd != -1) {
char buf[32] = {0};
ssize_t r = read(fd, buf, sizeof(buf) - 1);
close(fd);
if (r > 0) {
quota = atol(buf);
}
}

if (quota <= 0) {
return -1; // unconstrained or unavailable
}

fd = open("/sys/fs/cgroup/cpu/cpu.cfs_period_us", O_RDONLY);
if (fd != -1) {
char buf[32] = {0};
ssize_t r = read(fd, buf, sizeof(buf) - 1);
close(fd);
if (r > 0) {
long p = atol(buf);
if (p > 0) period = p;
}
}

return (int)(quota * 1000 / period);
}

long OS::getContainerMemoryLimit() {
// Try cgroup v2 first
int fd = open("/sys/fs/cgroup/memory.max", O_RDONLY);
if (fd != -1) {
char buf[32] = {0};
ssize_t r = read(fd, buf, sizeof(buf) - 1);
close(fd);
if (r > 0) {
if (strncmp(buf, "max", 3) == 0) {
return -1; // unconstrained
}
long limit = atol(buf);
if (limit > 0) return limit;
}
}

// Fall back to cgroup v1
fd = open("/sys/fs/cgroup/memory/memory.limit_in_bytes", O_RDONLY);
if (fd != -1) {
char buf[32] = {0};
ssize_t r = read(fd, buf, sizeof(buf) - 1);
close(fd);
if (r > 0) {
long limit = atol(buf);
// A limit of 9223372036854771712 (LLONG_MAX rounded) means unconstrained
if (limit > 0 && limit < 0x7ffffffffffff000L) {
return limit;
}
}
}

return -1;
}

u64 OS::getProcessCpuTime(u64* utime, u64* stime) {
struct tms buf;
clock_t real = times(&buf);
Expand Down
8 changes: 8 additions & 0 deletions ddprof-lib/src/main/cpp/os_macos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,14 @@ int OS::getCpuCount() {
return sysctlbyname("hw.logicalcpu", &cpu_count, &size, NULL, 0) == 0 ? cpu_count : 1;
}

int OS::getCgroupCpuMillicores() {
return -1; // not applicable on macOS
}

long OS::getContainerMemoryLimit() {
return -1; // not applicable on macOS
}

u64 OS::getProcessCpuTime(u64* utime, u64* stime) {
struct tms buf;
clock_t real = times(&buf);
Expand Down
13 changes: 13 additions & 0 deletions ddprof-lib/src/main/cpp/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "utils.h"
#include "wallClock.h"
#include "frames.h"
#include "sanityCheck.h"

#include <algorithm>
#include <dlfcn.h>
Expand Down Expand Up @@ -1019,6 +1020,18 @@ Error Profiler::start(Arguments &args, bool reset) {
return Error("Profiler already started");
}

if (!args._skip_sanity_checks) {
static Error sanity_result = Error::OK;
static bool sanity_checked = false;
if (!sanity_checked) {
sanity_checked = true;
sanity_result = SanityChecker::runChecks(args);
}
if (sanity_result) {
return sanity_result;
}
}

Error error = checkJvmCapabilities();
if (error) {
return error;
Expand Down
111 changes: 111 additions & 0 deletions ddprof-lib/src/main/cpp/sanityCheck.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Copyright 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

#include "sanityCheck.h"
#include "common.h"
#include "os.h"
#include "hotspot/vmStructs.h"

// Returns the value of a size-typed JVM flag, or default_val if not found.
static size_t getVMSizeFlag(const char* name, size_t default_val) {
VMFlag* f = VMFlag::find(name, {VMFlag::Type::Uintx, VMFlag::Type::Size_t, VMFlag::Type::Uint64_t});
if (f != NULL && f->addr() != NULL) {
return *static_cast<size_t*>(f->addr());
}
return default_val;
}

Error SanityChecker::runChecks(const Arguments& /*args*/) {
// Static buffer for error message — safe because runChecks is called under
// _state_lock and the result is cached as a static Error in profiler.cpp.
static char err_buf[1024];

// --- Gather all system info upfront ---
int logical_cpus = OS::getCpuCount();
int cgroup_mc = OS::getCgroupCpuMillicores();
long container_limit = OS::getContainerMemoryLimit();
bool containerized = (cgroup_mc > 0 || container_limit > 0);

int effective_cores = logical_cpus;
if (cgroup_mc > 0) {
int cgroup_cores = cgroup_mc / 1000;
if (cgroup_cores < effective_cores) {
effective_cores = cgroup_cores;
}
}

const u64 OS_RESERVE = 128ULL * 1024 * 1024;
const u64 PROFILER_OVERHEAD = 64ULL * 1024 * 1024;

u64 ram = OS::getRamSize();
u64 upper = (ram > OS_RESERVE) ? (ram - OS_RESERVE) : 0;
if (container_limit > 0 && (u64)container_limit < upper) {
upper = (u64)container_limit;
}

const size_t DEFAULT_METASPACE = 256ULL * 1024 * 1024;
const size_t DEFAULT_CODECACHE = 240ULL * 1024 * 1024;
const size_t DEFAULT_STACK_SIZE = 512ULL * 1024;
const int DEFAULT_THREAD_COUNT = 200;

size_t heap_max = getVMSizeFlag("MaxHeapSize", 0);
size_t metaspace_max = getVMSizeFlag("MaxMetaspaceSize", DEFAULT_METASPACE);
size_t codecache = getVMSizeFlag("ReservedCodeCacheSize", DEFAULT_CODECACHE);
size_t stack_size = getVMSizeFlag("ThreadStackSize", DEFAULT_STACK_SIZE / 1024) * 1024;

int thread_count = DEFAULT_THREAD_COUNT;
ProcessInfo info = {};
if (OS::getBasicProcessInfo(OS::processId(), &info) && info.threads > 0) {
thread_count = info.threads;
}

u64 gc_overhead = (u64)heap_max * 30 / 100;
u64 lower = (u64)heap_max + (u64)metaspace_max + (u64)codecache
+ gc_overhead
+ (u64)thread_count * (u64)stack_size
+ PROFILER_OVERHEAD;

// --- Run checks ---
bool cpu_fail = (effective_cores < 1);
bool mem_fail = (upper > 0 && lower > upper);

if (!cpu_fail && !mem_fail) {
return Error::OK;
}

if (cpu_fail) {
LOG_WARN("Sanity check failed: effective CPU count is %d (logical=%d, cgroup=%dmc).",
effective_cores, logical_cpus, cgroup_mc);
}
if (mem_fail) {
LOG_WARN("Sanity check failed: estimated memory requirement (%llu MB) exceeds available memory (%llu MB).",
(unsigned long long)(lower / (1024 * 1024)),
(unsigned long long)(upper / (1024 * 1024)));
}

snprintf(err_buf, sizeof(err_buf),
"[sanity] cpu=%s,memory=%s,"
"logical_cores=%d,cgroup_millicores=%d,effective_cores=%d,"
"ram_mb=%llu,container_limit_mb=%lld,upper_mb=%llu,lower_mb=%llu,"
"heap_mb=%llu,metaspace_mb=%llu,codecache_mb=%llu,"
"gc_overhead_mb=%llu,threads=%d,stack_kb=%llu,profiler_mb=%llu,"
"containerized=%s",
cpu_fail ? "fail" : "ok",
mem_fail ? "fail" : "ok",
logical_cpus, cgroup_mc, effective_cores,
(unsigned long long)(ram / (1024 * 1024)),
container_limit > 0 ? (long long)(container_limit / (1024 * 1024)) : -1LL,
(unsigned long long)(upper / (1024 * 1024)),
(unsigned long long)(lower / (1024 * 1024)),
(unsigned long long)(heap_max / (1024 * 1024)),
(unsigned long long)(metaspace_max / (1024 * 1024)),
(unsigned long long)(codecache / (1024 * 1024)),
(unsigned long long)(gc_overhead / (1024 * 1024)),
thread_count,
(unsigned long long)(stack_size / 1024),
(unsigned long long)(PROFILER_OVERHEAD / (1024 * 1024)),
containerized ? "true" : "false");
return Error(err_buf);
}
16 changes: 16 additions & 0 deletions ddprof-lib/src/main/cpp/sanityCheck.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Copyright 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

#ifndef _SANITY_CHECK_H
#define _SANITY_CHECK_H

#include "arguments.h"

class SanityChecker {
public:
static Error runChecks(const Arguments& args);
};

#endif // _SANITY_CHECK_H
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Copyright 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

package com.datadoghq.profiler.sanity;

import com.datadoghq.profiler.JavaProfiler;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;

public class SanityCheckTest {

private JavaProfiler profiler;
private Path jfrDump;

private String startCommand(String extra) throws IOException {
Path rootDir = Paths.get("/tmp/recordings");
Files.createDirectories(rootDir);
jfrDump = Files.createTempFile(rootDir, "sanity-check-test", ".jfr");
String base = "start,cpu=10ms,jfr,file=" + jfrDump.toAbsolutePath();
return extra == null || extra.isEmpty() ? base : base + "," + extra;
}

@AfterEach
void cleanup() throws Exception {
if (profiler != null) {
try {
profiler.stop();
} catch (IllegalStateException ignored) {
// already stopped or never started
}
}
if (jfrDump != null) {
Files.deleteIfExists(jfrDump);
}
}

/**
* nosanity=true bypasses sanity checks; profiler must start successfully on any host.
*/
@Test
void nosanity_bypasses_checks() throws Exception {
profiler = JavaProfiler.getInstance();
assertDoesNotThrow(() -> profiler.execute(startCommand("nosanity")));
}

/**
* The override flag works regardless of value form (bare keyword vs explicit true).
*/
@Test
void nosanity_explicit_true_bypasses_checks() throws Exception {
profiler = JavaProfiler.getInstance();
assertDoesNotThrow(() -> profiler.execute(startCommand("nosanity=true")));
}

/**
* Sanity checks run at most once across start/stop cycles.
* After a successful start with checks enabled, subsequent starts do not re-run checks.
*/
@Test
void sanity_checks_run_once() throws Exception {
profiler = JavaProfiler.getInstance();
// First start with nosanity to guarantee success regardless of host resources.
profiler.execute(startCommand("nosanity"));
profiler.stop();
// Second start (without nosanity) must not fail due to re-running checks — the
// static guard in the native layer ensures they only fire on the first invocation.
// On a normal host this will also pass because normal hosts satisfy the requirements.
// On a pathological host the first start already set sanity_checked=true.
assertDoesNotThrow(() -> profiler.execute(startCommand("nosanity")));
}
}
Loading