From a510eb0651b8c25688cdb8f8ef0a38f9ca868a95 Mon Sep 17 00:00:00 2001 From: eigenvivek Date: Wed, 18 Mar 2026 17:28:15 -0400 Subject: [PATCH 1/5] Add adjoints for `texture_sample` ([GH-1301](https://github.com/NVIDIA/warp/issues/1301)). Signed-off-by: eigenvivek --- warp/_src/builtins.py | 32 +-- warp/_src/texture.py | 45 +++- warp/native/texture.h | 349 +++++++++++++++++++++++++++++- warp/tests/cuda/test_texture.py | 363 ++++++++++++++++++++++++++++++++ 4 files changed, 747 insertions(+), 42 deletions(-) diff --git a/warp/_src/builtins.py b/warp/_src/builtins.py index e068556a67..c1d783efa5 100644 --- a/warp/_src/builtins.py +++ b/warp/_src/builtins.py @@ -7809,7 +7809,7 @@ def texture_sample_1d_dispatch_func(input_types: Mapping[str, type], return_type The sampled value of the specified ``dtype``. Filtering mode is :attr:`warp.TextureFilterMode.CLOSEST` or :attr:`warp.TextureFilterMode.LINEAR`.""", - is_differentiable=False, + is_differentiable=True, ) @@ -7856,7 +7856,7 @@ def texture_sample_2d_dispatch_func(input_types: Mapping[str, type], return_type The sampled value of the specified ``dtype``. Filtering mode is :attr:`warp.TextureFilterMode.CLOSEST` or :attr:`warp.TextureFilterMode.LINEAR`.""", - is_differentiable=False, + is_differentiable=True, ) # texture_sample for 2D textures with separate u, v coordinates @@ -7886,7 +7886,7 @@ def texture_sample_2d_dispatch_func(input_types: Mapping[str, type], return_type The sampled value of the specified ``dtype``. Filtering mode is :attr:`warp.TextureFilterMode.CLOSEST` or :attr:`warp.TextureFilterMode.LINEAR`.""", - is_differentiable=False, + is_differentiable=True, ) @@ -7933,7 +7933,7 @@ def texture_sample_3d_dispatch_func(input_types: Mapping[str, type], return_type The sampled value of the specified ``dtype``. Filtering mode is :attr:`warp.TextureFilterMode.CLOSEST` or :attr:`warp.TextureFilterMode.LINEAR`.""", - is_differentiable=False, + is_differentiable=True, ) # texture_sample for 3D textures with separate u, v, w coordinates @@ -7965,7 +7965,7 @@ def texture_sample_3d_dispatch_func(input_types: Mapping[str, type], return_type The sampled value of the specified ``dtype``. Filtering mode is :attr:`warp.TextureFilterMode.CLOSEST` or :attr:`warp.TextureFilterMode.LINEAR`.""", - is_differentiable=False, + is_differentiable=True, ) @@ -9828,17 +9828,6 @@ def vector_assign_dispatch_func(input_types: Mapping[str, type], return_type: An group="Utility", ) -# Bool vector assign_inplace (bool is not part of Scalar) -add_builtin( - "assign_inplace", - input_types={"a": vector(length=Any, dtype=bool), "i": Any, "value": Any}, - value_type=None, - dispatch_func=vector_assign_dispatch_func, - hidden=True, - export=False, - group="Utility", -) - # implements quaternion[index] = value add_builtin( "assign_inplace", @@ -9877,17 +9866,6 @@ def vector_assign_copy_value_func(arg_types: Mapping[str, type], arg_values: Map group="Utility", ) -# Bool vector assign_copy (bool is not part of Scalar) -add_builtin( - "assign_copy", - input_types={"a": vector(length=Any, dtype=bool), "i": Any, "value": Any}, - value_func=vector_assign_copy_value_func, - dispatch_func=vector_assign_dispatch_func, - hidden=True, - export=False, - group="Utility", -) - # implements quaternion[index] = value, performs a copy internally if wp.config.enable_vector_component_overwrites is True add_builtin( "assign_copy", diff --git a/warp/_src/texture.py b/warp/_src/texture.py index bd77344c34..59b8e1a32e 100644 --- a/warp/_src/texture.py +++ b/warp/_src/texture.py @@ -80,12 +80,16 @@ class texture1d_t(ctypes.Structure): ("tex", ctypes.c_uint64), ("width", ctypes.c_int32), ("num_channels", ctypes.c_int32), + ("filter_mode", ctypes.c_int32), + ("use_normalized_coords", ctypes.c_int32), ) - def __init__(self, tex=0, width=0, num_channels=0): + def __init__(self, tex=0, width=0, num_channels=0, filter_mode=0, use_normalized_coords=1): self.tex = tex self.width = width self.num_channels = num_channels + self.filter_mode = filter_mode + self.use_normalized_coords = use_normalized_coords class texture2d_t(ctypes.Structure): @@ -99,13 +103,17 @@ class texture2d_t(ctypes.Structure): ("width", ctypes.c_int32), ("height", ctypes.c_int32), ("num_channels", ctypes.c_int32), + ("filter_mode", ctypes.c_int32), + ("use_normalized_coords", ctypes.c_int32), ) - def __init__(self, tex=0, width=0, height=0, num_channels=0): + def __init__(self, tex=0, width=0, height=0, num_channels=0, filter_mode=0, use_normalized_coords=1): self.tex = tex self.width = width self.height = height self.num_channels = num_channels + self.filter_mode = filter_mode + self.use_normalized_coords = use_normalized_coords class texture3d_t(ctypes.Structure): @@ -120,14 +128,18 @@ class texture3d_t(ctypes.Structure): ("height", ctypes.c_int32), ("depth", ctypes.c_int32), ("num_channels", ctypes.c_int32), + ("filter_mode", ctypes.c_int32), + ("use_normalized_coords", ctypes.c_int32), ) - def __init__(self, tex=0, width=0, height=0, depth=0, num_channels=0): + def __init__(self, tex=0, width=0, height=0, depth=0, num_channels=0, filter_mode=0, use_normalized_coords=1): self.tex = tex self.width = width self.height = height self.depth = depth self.num_channels = num_channels + self.filter_mode = filter_mode + self.use_normalized_coords = use_normalized_coords class cuda_array_desc_t(ctypes.Structure): @@ -956,7 +968,13 @@ def __ctype__(self) -> texture1d_t: """Return the ctypes structure for passing to kernels.""" if self._tex_handle == 0: raise RuntimeError("Texture was created with data=None but never initialized.") - return texture1d_t(self._tex_handle, self._width, self._num_channels) + return texture1d_t( + self._tex_handle, + self._width, + self._num_channels, + int(self._filter_mode), + int(self._normalized_coords), + ) class Texture2D(Texture): @@ -1033,7 +1051,14 @@ def __ctype__(self) -> texture2d_t: """Return the ctypes structure for passing to kernels.""" if self._tex_handle == 0: raise RuntimeError("Texture was created with data=None but never initialized.") - return texture2d_t(self._tex_handle, self._width, self._height, self._num_channels) + return texture2d_t( + self._tex_handle, + self._width, + self._height, + self._num_channels, + int(self._filter_mode), + int(self._normalized_coords), + ) class Texture3D(Texture): @@ -1114,7 +1139,15 @@ def __ctype__(self) -> texture3d_t: """Return the ctypes structure for passing to kernels.""" if self._tex_handle == 0: raise RuntimeError("Texture was created with data=None but never initialized.") - return texture3d_t(self._tex_handle, self._width, self._height, self._depth, self._num_channels) + return texture3d_t( + self._tex_handle, + self._width, + self._height, + self._depth, + self._num_channels, + int(self._filter_mode), + int(self._normalized_coords), + ) class TextureResourceFlags(enum.IntEnum): diff --git a/warp/native/texture.h b/warp/native/texture.h index 7bd5c27f81..ac49cecc79 100644 --- a/warp/native/texture.h +++ b/warp/native/texture.h @@ -155,18 +155,26 @@ struct texture1d_t { uint64 tex; // CUtexObject handle (GPU) or Texture* (CPU) int32 width; int32 num_channels; + int32 filter_mode; + int32 use_normalized_coords; CUDA_CALLABLE inline texture1d_t() : tex(0) , width(0) , num_channels(0) + , filter_mode(0) + , use_normalized_coords(1) { } - CUDA_CALLABLE inline texture1d_t(uint64 tex, int32 width, int32 num_channels) + CUDA_CALLABLE inline texture1d_t( + uint64 tex, int32 width, int32 num_channels, int32 filter_mode, int32 use_normalized_coords + ) : tex(tex) , width(width) , num_channels(num_channels) + , filter_mode(filter_mode) + , use_normalized_coords(use_normalized_coords) { } }; @@ -176,20 +184,28 @@ struct texture2d_t { int32 width; int32 height; int32 num_channels; + int32 filter_mode; + int32 use_normalized_coords; CUDA_CALLABLE inline texture2d_t() : tex(0) , width(0) , height(0) , num_channels(0) + , filter_mode(0) + , use_normalized_coords(1) { } - CUDA_CALLABLE inline texture2d_t(uint64 tex, int32 width, int32 height, int32 num_channels) + CUDA_CALLABLE inline texture2d_t( + uint64 tex, int32 width, int32 height, int32 num_channels, int32 filter_mode, int32 use_normalized_coords + ) : tex(tex) , width(width) , height(height) , num_channels(num_channels) + , filter_mode(filter_mode) + , use_normalized_coords(use_normalized_coords) { } }; @@ -200,6 +216,8 @@ struct texture3d_t { int32 height; int32 depth; int32 num_channels; + int32 filter_mode; + int32 use_normalized_coords; CUDA_CALLABLE inline texture3d_t() : tex(0) @@ -207,15 +225,27 @@ struct texture3d_t { , height(0) , depth(0) , num_channels(0) + , filter_mode(0) + , use_normalized_coords(1) { } - CUDA_CALLABLE inline texture3d_t(uint64 tex, int32 width, int32 height, int32 depth, int32 num_channels) + CUDA_CALLABLE inline texture3d_t( + uint64 tex, + int32 width, + int32 height, + int32 depth, + int32 num_channels, + int32 filter_mode, + int32 use_normalized_coords + ) : tex(tex) , width(width) , height(height) , depth(depth) , num_channels(num_channels) + , filter_mode(filter_mode) + , use_normalized_coords(use_normalized_coords) { } }; @@ -658,6 +688,12 @@ template <> struct texture_sample_helper { } static CUDA_CALLABLE float zero() { return 0.0f; } + +#if defined(__CUDA_ARCH__) + static CUDA_CALLABLE float fetch_1d(uint64 t, float u, int c) { return tex1D(t, u); } + static CUDA_CALLABLE float fetch_2d(uint64 t, float u, float v, int c) { return tex2D(t, u, v); } + static CUDA_CALLABLE float fetch_3d(uint64 t, float u, float v, float w, int c) { return tex3D(t, u, v, w); } +#endif }; template <> struct texture_sample_helper { @@ -701,6 +737,24 @@ template <> struct texture_sample_helper { } static CUDA_CALLABLE vec2f zero() { return vec2f(0.0f, 0.0f); } + +#if defined(__CUDA_ARCH__) + static CUDA_CALLABLE float fetch_1d(uint64 t, float u, int c) + { + float2 v = tex1D(t, u); + return c == 0 ? v.x : v.y; + } + static CUDA_CALLABLE float fetch_2d(uint64 t, float u, float v_, int c) + { + float2 v = tex2D(t, u, v_); + return c == 0 ? v.x : v.y; + } + static CUDA_CALLABLE float fetch_3d(uint64 t, float u, float v_, float w, int c) + { + float2 v = tex3D(t, u, v_, w); + return c == 0 ? v.x : v.y; + } +#endif }; template <> struct texture_sample_helper { @@ -753,6 +807,24 @@ template <> struct texture_sample_helper { } static CUDA_CALLABLE vec4f zero() { return vec4f(0.0f, 0.0f, 0.0f, 0.0f); } + +#if defined(__CUDA_ARCH__) + static CUDA_CALLABLE float fetch_1d(uint64 t, float u, int c) + { + float4 v = tex1D(t, u); + return c == 0 ? v.x : c == 1 ? v.y : c == 2 ? v.z : v.w; + } + static CUDA_CALLABLE float fetch_2d(uint64 t, float u, float v_, int c) + { + float4 v = tex2D(t, u, v_); + return c == 0 ? v.x : c == 1 ? v.y : c == 2 ? v.z : v.w; + } + static CUDA_CALLABLE float fetch_3d(uint64 t, float u, float v_, float w, int c) + { + float4 v = tex3D(t, u, v_, w); + return c == 0 ? v.x : c == 1 ? v.y : c == 2 ? v.z : v.w; + } +#endif }; // 1D texture sampling with scalar coordinate @@ -785,19 +857,66 @@ template CUDA_CALLABLE T texture_sample(const texture3d_t& tex, flo return texture_sample_helper::sample_3d(tex, u, v, w); } -// Adjoint stubs for texture sampling (non-differentiable for now) +// Adjoints for texture sampling w.r.t. sampling coordinates. +// Gradients w.r.t. texture data are not supported; adj_tex is a no-op. +// On GPU, requires filter_mode and use_normalized_coords in the descriptor. +// Boundary behavior matches PyTorch grid_sample with padding_mode="border": +// gradient is zero when the sampling position straddles a volume boundary. template CUDA_CALLABLE void adj_texture_sample(const texture1d_t& tex, float u, texture1d_t& adj_tex, float& adj_u, const T& adj_ret) { - // Texture sampling is not differentiable in this implementation + if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST) + return; + + float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f; + +#if defined(__CUDA_ARCH__) + float raw_tx = tex.use_normalized_coords ? u * (float)tex.width - 0.5f : u - 0.5f; + int x0 = (int)floor(raw_tx); + int x1 = x0 + 1; + + if (x0 >= 0 && x1 < tex.width) { + float u0 = tex.use_normalized_coords ? ((float)x0 + 0.5f) / (float)tex.width : (float)x0 + 0.5f; + float u1 = tex.use_normalized_coords ? ((float)x1 + 0.5f) / (float)tex.width : (float)x1 + 0.5f; + float gtx = 0.0f; + for (int c = 0; c < tex.num_channels; c++) + gtx += (texture_sample_helper::fetch_1d(tex.tex, u1, c) + - texture_sample_helper::fetch_1d(tex.tex, u0, c)) + * ((const float*)&adj_ret)[c]; + adj_u += gtx_mult * gtx; + } +#else + if (tex.tex == 0) + return; + const Texture* cpu_tex = (const Texture*)tex.tex; + + float coord_u = cpu_tex->use_normalized_coords ? u : (u / (float)cpu_tex->width); + float raw_tx = coord_u * (float)cpu_tex->width - 0.5f; + float tx = cpu_apply_address_mode_1d(coord_u, cpu_tex->width, cpu_tex->address_mode_u); + + int x0_raw = (int)floor(raw_tx); + int x1_raw = x0_raw + 1; + int x0 = (int)floor(tx); + int x1 = x0 + 1; + + if (cpu_in_bounds_1d(x0_raw, cpu_tex->width) && cpu_in_bounds_1d(x1_raw, cpu_tex->width)) { + int x0w = cpu_apply_address_mode_index(x0, cpu_tex->width, cpu_tex->address_mode_u); + int x1w = cpu_apply_address_mode_index(x1, cpu_tex->width, cpu_tex->address_mode_u); + float gtx = 0.0f; + for (int c = 0; c < cpu_tex->num_channels; c++) + gtx += (cpu_fetch_texel_1d(cpu_tex, x1w, c) - cpu_fetch_texel_1d(cpu_tex, x0w, c)) + * ((const float*)&adj_ret)[c]; + adj_u += gtx_mult * gtx; + } +#endif } template CUDA_CALLABLE void adj_texture_sample(const texture2d_t& tex, const vec2f& uv, texture2d_t& adj_tex, vec2f& adj_uv, const T& adj_ret) { - // Texture sampling is not differentiable in this implementation + adj_texture_sample(tex, uv[0], uv[1], adj_tex, adj_uv[0], adj_uv[1], adj_ret); } template @@ -805,14 +924,98 @@ CUDA_CALLABLE void adj_texture_sample( const texture2d_t& tex, float u, float v, texture2d_t& adj_tex, float& adj_u, float& adj_v, const T& adj_ret ) { - // Texture sampling is not differentiable in this implementation + if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST) + return; + + float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f; + float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f; + +#if defined(__CUDA_ARCH__) + float raw_tx = tex.use_normalized_coords ? u * (float)tex.width - 0.5f : u - 0.5f; + float raw_ty = tex.use_normalized_coords ? v * (float)tex.height - 0.5f : v - 0.5f; + int x0 = (int)floor(raw_tx); + int x1 = x0 + 1; + int y0 = (int)floor(raw_ty); + int y1 = y0 + 1; + float fx = raw_tx - (float)x0; + float fy = raw_ty - (float)y0; + + bool x_ok = (x0 >= 0 && x1 < tex.width); + bool y_ok = (y0 >= 0 && y1 < tex.height); + + auto fetch = [&](int x, int y, int c) -> float { + float uf = tex.use_normalized_coords ? ((float)x + 0.5f) / (float)tex.width : (float)x + 0.5f; + float vf = tex.use_normalized_coords ? ((float)y + 0.5f) / (float)tex.height : (float)y + 0.5f; + return texture_sample_helper::fetch_2d(tex.tex, uf, vf, c); + }; + + float gtx = 0.0f, gty = 0.0f; + for (int c = 0; c < tex.num_channels; c++) { + float gOut = ((const float*)&adj_ret)[c]; + float v00 = fetch(x0, y0, c); + float v10 = fetch(x1, y0, c); + float v01 = fetch(x0, y1, c); + float v11 = fetch(x1, y1, c); + if (x_ok) + gtx += ((v10 - v00) * (1.0f - fy) + (v11 - v01) * fy) * gOut; + if (y_ok) + gty += ((v01 - v00) * (1.0f - fx) + (v11 - v10) * fx) * gOut; + } + adj_u += gtx_mult * gtx; + adj_v += gty_mult * gty; +#else + if (tex.tex == 0) + return; + const Texture* cpu_tex = (const Texture*)tex.tex; + + float coord_u = cpu_tex->use_normalized_coords ? u : (u / (float)cpu_tex->width); + float coord_v = cpu_tex->use_normalized_coords ? v : (v / (float)cpu_tex->height); + float raw_tx = coord_u * (float)cpu_tex->width - 0.5f; + float raw_ty = coord_v * (float)cpu_tex->height - 0.5f; + float tx = cpu_apply_address_mode_1d(coord_u, cpu_tex->width, cpu_tex->address_mode_u); + float ty = cpu_apply_address_mode_1d(coord_v, cpu_tex->height, cpu_tex->address_mode_v); + + int x0_raw = (int)floor(raw_tx); + int x1_raw = x0_raw + 1; + int y0_raw = (int)floor(raw_ty); + int y1_raw = y0_raw + 1; + int x0 = (int)floor(tx); + int x1 = x0 + 1; + int y0 = (int)floor(ty); + int y1 = y0 + 1; + float fx = tx - (float)x0; + float fy = ty - (float)y0; + + bool x_ok = (cpu_in_bounds_1d(x0_raw, cpu_tex->width) && cpu_in_bounds_1d(x1_raw, cpu_tex->width)); + bool y_ok = (cpu_in_bounds_1d(y0_raw, cpu_tex->height) && cpu_in_bounds_1d(y1_raw, cpu_tex->height)); + + int x0w = cpu_apply_address_mode_index(x0, cpu_tex->width, cpu_tex->address_mode_u); + int x1w = cpu_apply_address_mode_index(x1, cpu_tex->width, cpu_tex->address_mode_u); + int y0w = cpu_apply_address_mode_index(y0, cpu_tex->height, cpu_tex->address_mode_v); + int y1w = cpu_apply_address_mode_index(y1, cpu_tex->height, cpu_tex->address_mode_v); + + float gtx = 0.0f, gty = 0.0f; + for (int c = 0; c < cpu_tex->num_channels; c++) { + float gOut = ((const float*)&adj_ret)[c]; + float v00 = cpu_fetch_texel_2d(cpu_tex, x0w, y0w, c); + float v10 = cpu_fetch_texel_2d(cpu_tex, x1w, y0w, c); + float v01 = cpu_fetch_texel_2d(cpu_tex, x0w, y1w, c); + float v11 = cpu_fetch_texel_2d(cpu_tex, x1w, y1w, c); + if (x_ok) + gtx += ((v10 - v00) * (1.0f - fy) + (v11 - v01) * fy) * gOut; + if (y_ok) + gty += ((v01 - v00) * (1.0f - fx) + (v11 - v10) * fx) * gOut; + } + adj_u += gtx_mult * gtx; + adj_v += gty_mult * gty; +#endif } template CUDA_CALLABLE void adj_texture_sample(const texture3d_t& tex, const vec3f& uvw, texture3d_t& adj_tex, vec3f& adj_uvw, const T& adj_ret) { - // Texture sampling is not differentiable in this implementation + adj_texture_sample(tex, uvw[0], uvw[1], uvw[2], adj_tex, adj_uvw[0], adj_uvw[1], adj_uvw[2], adj_ret); } template @@ -828,7 +1031,135 @@ CUDA_CALLABLE void adj_texture_sample( const T& adj_ret ) { - // Texture sampling is not differentiable in this implementation + if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST) + return; + + float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f; + float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f; + float gtz_mult = tex.use_normalized_coords ? (float)tex.depth : 1.0f; + +#if defined(__CUDA_ARCH__) + float raw_tx = tex.use_normalized_coords ? u * (float)tex.width - 0.5f : u - 0.5f; + float raw_ty = tex.use_normalized_coords ? v * (float)tex.height - 0.5f : v - 0.5f; + float raw_tz = tex.use_normalized_coords ? w * (float)tex.depth - 0.5f : w - 0.5f; + int x0 = (int)floor(raw_tx); + int x1 = x0 + 1; + int y0 = (int)floor(raw_ty); + int y1 = y0 + 1; + int z0 = (int)floor(raw_tz); + int z1 = z0 + 1; + float fx = raw_tx - (float)x0; + float fy = raw_ty - (float)y0; + float fz = raw_tz - (float)z0; + + bool x_ok = (x0 >= 0 && x1 < tex.width); + bool y_ok = (y0 >= 0 && y1 < tex.height); + bool z_ok = (z0 >= 0 && z1 < tex.depth); + + auto fetch = [&](int x, int y, int z, int c) -> float { + float uf = tex.use_normalized_coords ? ((float)x + 0.5f) / (float)tex.width : (float)x + 0.5f; + float vf = tex.use_normalized_coords ? ((float)y + 0.5f) / (float)tex.height : (float)y + 0.5f; + float wf = tex.use_normalized_coords ? ((float)z + 0.5f) / (float)tex.depth : (float)z + 0.5f; + return texture_sample_helper::fetch_3d(tex.tex, uf, vf, wf, c); + }; + + float gtx = 0.0f, gty = 0.0f, gtz = 0.0f; + for (int c = 0; c < tex.num_channels; c++) { + float gOut = ((const float*)&adj_ret)[c]; + float v000 = fetch(x0, y0, z0, c); + float v100 = fetch(x1, y0, z0, c); + float v010 = fetch(x0, y1, z0, c); + float v110 = fetch(x1, y1, z0, c); + float v001 = fetch(x0, y0, z1, c); + float v101 = fetch(x1, y0, z1, c); + float v011 = fetch(x0, y1, z1, c); + float v111 = fetch(x1, y1, z1, c); + if (x_ok) + gtx += (((v100 - v000) * (1.0f - fy) + (v110 - v010) * fy) * (1.0f - fz) + + ((v101 - v001) * (1.0f - fy) + (v111 - v011) * fy) * fz) + * gOut; + if (y_ok) + gty += (((v010 - v000) * (1.0f - fx) + (v110 - v100) * fx) * (1.0f - fz) + + ((v011 - v001) * (1.0f - fx) + (v111 - v101) * fx) * fz) + * gOut; + if (z_ok) + gtz += (((v001 - v000) * (1.0f - fx) + (v101 - v100) * fx) * (1.0f - fy) + + ((v011 - v010) * (1.0f - fx) + (v111 - v110) * fx) * fy) + * gOut; + } + adj_u += gtx_mult * gtx; + adj_v += gty_mult * gty; + adj_w += gtz_mult * gtz; +#else + if (tex.tex == 0) + return; + const Texture* cpu_tex = (const Texture*)tex.tex; + + float coord_u = cpu_tex->use_normalized_coords ? u : (u / (float)cpu_tex->width); + float coord_v = cpu_tex->use_normalized_coords ? v : (v / (float)cpu_tex->height); + float coord_w = cpu_tex->use_normalized_coords ? w : (w / (float)cpu_tex->depth); + float raw_tx = coord_u * (float)cpu_tex->width - 0.5f; + float raw_ty = coord_v * (float)cpu_tex->height - 0.5f; + float raw_tz = coord_w * (float)cpu_tex->depth - 0.5f; + float tx = cpu_apply_address_mode_1d(coord_u, cpu_tex->width, cpu_tex->address_mode_u); + float ty = cpu_apply_address_mode_1d(coord_v, cpu_tex->height, cpu_tex->address_mode_v); + float tz = cpu_apply_address_mode_1d(coord_w, cpu_tex->depth, cpu_tex->address_mode_w); + + int x0_raw = (int)floor(raw_tx); + int x1_raw = x0_raw + 1; + int y0_raw = (int)floor(raw_ty); + int y1_raw = y0_raw + 1; + int z0_raw = (int)floor(raw_tz); + int z1_raw = z0_raw + 1; + int x0 = (int)floor(tx); + int x1 = x0 + 1; + int y0 = (int)floor(ty); + int y1 = y0 + 1; + int z0 = (int)floor(tz); + int z1 = z0 + 1; + float fx = tx - (float)x0; + float fy = ty - (float)y0; + float fz = tz - (float)z0; + + bool x_ok = (cpu_in_bounds_1d(x0_raw, cpu_tex->width) && cpu_in_bounds_1d(x1_raw, cpu_tex->width)); + bool y_ok = (cpu_in_bounds_1d(y0_raw, cpu_tex->height) && cpu_in_bounds_1d(y1_raw, cpu_tex->height)); + bool z_ok = (cpu_in_bounds_1d(z0_raw, cpu_tex->depth) && cpu_in_bounds_1d(z1_raw, cpu_tex->depth)); + + int x0w = cpu_apply_address_mode_index(x0, cpu_tex->width, cpu_tex->address_mode_u); + int x1w = cpu_apply_address_mode_index(x1, cpu_tex->width, cpu_tex->address_mode_u); + int y0w = cpu_apply_address_mode_index(y0, cpu_tex->height, cpu_tex->address_mode_v); + int y1w = cpu_apply_address_mode_index(y1, cpu_tex->height, cpu_tex->address_mode_v); + int z0w = cpu_apply_address_mode_index(z0, cpu_tex->depth, cpu_tex->address_mode_w); + int z1w = cpu_apply_address_mode_index(z1, cpu_tex->depth, cpu_tex->address_mode_w); + + float gtx = 0.0f, gty = 0.0f, gtz = 0.0f; + for (int c = 0; c < cpu_tex->num_channels; c++) { + float gOut = ((const float*)&adj_ret)[c]; + float v000 = cpu_fetch_texel_3d(cpu_tex, x0w, y0w, z0w, c); + float v100 = cpu_fetch_texel_3d(cpu_tex, x1w, y0w, z0w, c); + float v010 = cpu_fetch_texel_3d(cpu_tex, x0w, y1w, z0w, c); + float v110 = cpu_fetch_texel_3d(cpu_tex, x1w, y1w, z0w, c); + float v001 = cpu_fetch_texel_3d(cpu_tex, x0w, y0w, z1w, c); + float v101 = cpu_fetch_texel_3d(cpu_tex, x1w, y0w, z1w, c); + float v011 = cpu_fetch_texel_3d(cpu_tex, x0w, y1w, z1w, c); + float v111 = cpu_fetch_texel_3d(cpu_tex, x1w, y1w, z1w, c); + if (x_ok) + gtx += (((v100 - v000) * (1.0f - fy) + (v110 - v010) * fy) * (1.0f - fz) + + ((v101 - v001) * (1.0f - fy) + (v111 - v011) * fy) * fz) + * gOut; + if (y_ok) + gty += (((v010 - v000) * (1.0f - fx) + (v110 - v100) * fx) * (1.0f - fz) + + ((v011 - v001) * (1.0f - fx) + (v111 - v101) * fx) * fz) + * gOut; + if (z_ok) + gtz += (((v001 - v000) * (1.0f - fx) + (v101 - v100) * fx) * (1.0f - fy) + + ((v011 - v010) * (1.0f - fx) + (v111 - v110) * fx) * fy) + * gOut; + } + adj_u += gtx_mult * gtx; + adj_v += gty_mult * gty; + adj_w += gtz_mult * gtz; +#endif } // Type aliases for code generation diff --git a/warp/tests/cuda/test_texture.py b/warp/tests/cuda/test_texture.py index 2eb6ecedff..d686ae10cd 100644 --- a/warp/tests/cuda/test_texture.py +++ b/warp/tests/cuda/test_texture.py @@ -2600,6 +2600,330 @@ def test_texture3d_array(test, device): np.testing.assert_allclose(result, expected, rtol=1e-5, atol=1e-5) +# ============================================================================ +# Adjoint tests +# ============================================================================ + + +@wp.kernel +def sample_1d(tex: wp.Texture1D, pos: wp.array(dtype=float), out: wp.array(dtype=float)): + tid = wp.tid() + out[tid] = wp.texture_sample(tex, pos[tid], dtype=float) + + +@wp.kernel +def sample_2d(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=float)): + tid = wp.tid() + out[tid] = wp.texture_sample(tex, pos[tid], dtype=float) + + +@wp.kernel +def sample_3d(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.array(dtype=float)): + tid = wp.tid() + out[tid] = wp.texture_sample(tex, pos[tid], dtype=float) + + +def _grad_1d(data, u, device): + tex = wp.Texture1D( + data, + normalized_coords=False, + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, + device=device, + ) + pos = wp.array([u], dtype=float, requires_grad=True, device=device) + out = wp.zeros(1, dtype=float, requires_grad=True, device=device) + tape = wp.Tape() + with tape: + wp.launch(sample_1d, dim=1, inputs=[tex, pos], outputs=[out], device=device) + out.grad = wp.ones(1, dtype=float, device=device) + tape.backward() + return pos.grad.numpy()[0] + + +def _grad_2d(data, coord, device): + tex = wp.Texture2D( + data, + normalized_coords=False, + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, + device=device, + ) + pos = wp.array([wp.vec2f(*coord)], dtype=wp.vec2f, requires_grad=True, device=device) + out = wp.zeros(1, dtype=float, requires_grad=True, device=device) + tape = wp.Tape() + with tape: + wp.launch(sample_2d, dim=1, inputs=[tex, pos], outputs=[out], device=device) + out.grad = wp.ones(1, dtype=float, device=device) + tape.backward() + return pos.grad.numpy()[0] + + +def _grad_3d(data, coord, device): + tex = wp.Texture3D( + data, + normalized_coords=False, + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, + device=device, + ) + pos = wp.array([wp.vec3f(*coord)], dtype=wp.vec3f, requires_grad=True, device=device) + out = wp.zeros(1, dtype=float, requires_grad=True, device=device) + tape = wp.Tape() + with tape: + wp.launch(sample_3d, dim=1, inputs=[tex, pos], outputs=[out], device=device) + out.grad = wp.ones(1, dtype=float, device=device) + tape.backward() + return pos.grad.numpy()[0] + + +def test_texture1d_adj_boundary_zero(test, device): + """Gradient is zero when sampling position straddles the near boundary.""" + data = np.random.default_rng(0).standard_normal(16).astype(np.float32) + np.testing.assert_allclose(_grad_1d(data, 0.1, device), 0.0, atol=1e-6) + + +def test_texture1d_adj_far_boundary_zero(test, device): + """Gradient is zero when sampling position straddles the far boundary.""" + data = np.random.default_rng(1).standard_normal(16).astype(np.float32) + np.testing.assert_allclose(_grad_1d(data, 15.9, device), 0.0, atol=1e-6) + + +def test_texture1d_adj_closest_zero(test, device): + """Gradient is zero for CLOSEST filter mode.""" + data = np.random.default_rng(2).standard_normal(16).astype(np.float32) + tex = wp.Texture1D( + data, + normalized_coords=False, + filter_mode=wp.TextureFilterMode.CLOSEST, + address_mode=wp.TextureAddressMode.BORDER, + device=device, + ) + pos = wp.array([7.3], dtype=float, requires_grad=True, device=device) + out = wp.zeros(1, dtype=float, requires_grad=True, device=device) + tape = wp.Tape() + with tape: + wp.launch(sample_1d, dim=1, inputs=[tex, pos], outputs=[out], device=device) + out.grad = wp.ones(1, dtype=float, device=device) + tape.backward() + np.testing.assert_allclose(pos.grad.numpy()[0], 0.0, atol=1e-6) + + +def test_texture1d_adj_linear_signal(test, device): + """Gradient of a linear signal is constant and analytically known.""" + W = 16 + # value = x / W, so d(value)/d(u) = 1/W + data = np.arange(W, dtype=np.float32) / W + g = _grad_1d(data, 7.3, device) + np.testing.assert_allclose(g, 1.0 / W, atol=1e-5) + + +def test_texture2d_adj_near_boundary_zero(test, device): + """2D gradient is zero when straddling the near boundary in both axes.""" + data = np.random.default_rng(3).standard_normal((8, 10)).astype(np.float32) + g = _grad_2d(data, (0.1, 0.1), device) + np.testing.assert_allclose(g, [0.0, 0.0], atol=1e-6) + + +def test_texture2d_adj_far_boundary_zero(test, device): + """2D gradient is zero when straddling the far boundary.""" + data = np.random.default_rng(4).standard_normal((8, 10)).astype(np.float32) + H, W = data.shape + g = _grad_2d(data, (W - 0.1, H - 0.1), device) + np.testing.assert_allclose(g, [0.0, 0.0], atol=1e-6) + + +def test_texture2d_adj_partial_boundary(test, device): + """2D gradient: x interior but y straddling boundary — only y grad is zero.""" + data = np.random.default_rng(5).standard_normal((8, 10)).astype(np.float32) + g = _grad_2d(data, (3.7, 0.1), device) + # x is interior so gradient should be nonzero; y straddles boundary so zero + test.assertNotEqual(g[0], 0.0) + np.testing.assert_allclose(g[1], 0.0, atol=1e-6) + + +def test_texture2d_adj_linear_x(test, device): + """2D gradient of signal linear in x: x-grad is 1/W, y-grad is zero.""" + H, W = 6, 10 + data = np.zeros((H, W), dtype=np.float32) + for x in range(W): + data[:, x] = x / W + g = _grad_2d(data, (4.5, 3.0), device) + np.testing.assert_allclose(g[0], 1.0 / W, atol=1e-5) + np.testing.assert_allclose(g[1], 0.0, atol=1e-5) + + +def test_texture2d_adj_linear_y(test, device): + """2D gradient of signal linear in y: y-grad is 1/H, x-grad is zero.""" + H, W = 6, 10 + data = np.zeros((H, W), dtype=np.float32) + for y in range(H): + data[y, :] = y / H + g = _grad_2d(data, (4.5, 3.0), device) + np.testing.assert_allclose(g[0], 0.0, atol=1e-5) + np.testing.assert_allclose(g[1], 1.0 / H, atol=1e-5) + + +def test_texture3d_adj_near_boundary_zero(test, device): + """3D gradient is zero when straddling the near boundary in all axes.""" + data = np.random.default_rng(6).standard_normal((8, 6, 10)).astype(np.float32) + g = _grad_3d(data, (0.1, 0.1, 0.1), device) + np.testing.assert_allclose(g, [0.0, 0.0, 0.0], atol=1e-6) + + +def test_texture3d_adj_far_boundary_zero(test, device): + """3D gradient is zero when straddling the far boundary.""" + data = np.random.default_rng(7).standard_normal((8, 6, 10)).astype(np.float32) + D, H, W = data.shape + g = _grad_3d(data, (W - 0.1, H - 0.1, D - 0.1), device) + np.testing.assert_allclose(g, [0.0, 0.0, 0.0], atol=1e-6) + + +def test_texture3d_adj_partial_boundary(test, device): + """3D gradient: x and y interior, z straddling boundary — only z grad is zero.""" + data = np.random.default_rng(8).standard_normal((8, 6, 10)).astype(np.float32) + g = _grad_3d(data, (2.3, 3.7, 0.1), device) + test.assertNotEqual(g[0], 0.0) + test.assertNotEqual(g[1], 0.0) + np.testing.assert_allclose(g[2], 0.0, atol=1e-6) + + +def test_texture3d_adj_uniform_zero(test, device): + """Gradient of a uniform volume is zero (no spatial variation to differentiate).""" + data = np.ones((8, 6, 10), dtype=np.float32) + g = _grad_3d(data, (2.3, 3.7, 1.1), device) + np.testing.assert_allclose(g, [0.0, 0.0, 0.0], atol=1e-6) + + +def test_texture3d_adj_linear_x(test, device): + """3D gradient of signal linear in x: x-grad is 1/W, y and z grads are zero.""" + D, H, W = 8, 6, 10 + data = np.zeros((D, H, W), dtype=np.float32) + for x in range(W): + data[:, :, x] = x / W + g = _grad_3d(data, (4.5, 3.0, 3.0), device) + np.testing.assert_allclose(g[0], 1.0 / W, atol=1e-5) + np.testing.assert_allclose(g[1], 0.0, atol=1e-5) + np.testing.assert_allclose(g[2], 0.0, atol=1e-5) + + +def test_texture3d_adj_linear_y(test, device): + """3D gradient of signal linear in y: y-grad is 1/H, x and z grads are zero.""" + D, H, W = 8, 6, 10 + data = np.zeros((D, H, W), dtype=np.float32) + for y in range(H): + data[:, y, :] = y / H + g = _grad_3d(data, (4.5, 3.0, 3.0), device) + np.testing.assert_allclose(g[0], 0.0, atol=1e-5) + np.testing.assert_allclose(g[1], 1.0 / H, atol=1e-5) + np.testing.assert_allclose(g[2], 0.0, atol=1e-5) + + +def test_texture3d_adj_linear_z(test, device): + """3D gradient of signal linear in z: z-grad is 1/D, x and y grads are zero.""" + D, H, W = 8, 6, 10 + data = np.zeros((D, H, W), dtype=np.float32) + for z in range(D): + data[z, :, :] = z / D + g = _grad_3d(data, (4.5, 3.0, 3.0), device) + np.testing.assert_allclose(g[0], 0.0, atol=1e-5) + np.testing.assert_allclose(g[1], 0.0, atol=1e-5) + np.testing.assert_allclose(g[2], 1.0 / D, atol=1e-5) + + +def test_texture2d_adj_vec2f_linear_x(test, device): + """2D vec2f texture: x-grad matches scalar case for each channel independently.""" + H, W = 6, 10 + data = np.zeros((H, W, 2), dtype=np.float32) + for x in range(W): + data[:, x, 0] = x / W # channel 0: linear in x + data[:, x, 1] = (W - 1 - x) / W # channel 1: linear in x, reversed + + tex = wp.Texture2D(data, normalized_coords=False, + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, device=device) + + @wp.kernel + def sample_2d_vec2(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=wp.vec2f)): + tid = wp.tid() + out[tid] = wp.texture_sample(tex, pos[tid], dtype=wp.vec2f) + + pos = wp.array([wp.vec2f(4.5, 3.0)], dtype=wp.vec2f, requires_grad=True, device=device) + out = wp.zeros(1, dtype=wp.vec2f, requires_grad=True, device=device) + tape = wp.Tape() + with tape: + wp.launch(sample_2d_vec2, dim=1, inputs=[tex, pos], outputs=[out], device=device) + # seed gradient: both channels contribute equally + out.grad = wp.array([wp.vec2f(1.0, 1.0)], dtype=wp.vec2f, device=device) + tape.backward() + + g = pos.grad.numpy()[0] + # d(ch0)/dx = 1/W, d(ch1)/dx = -1/W, sum = 0 + np.testing.assert_allclose(g[0], 0.0, atol=1e-5) + np.testing.assert_allclose(g[1], 0.0, atol=1e-5) + + +def test_texture2d_adj_vec2f_channels_independent(test, device): + """2D vec2f texture: seeding only channel 0 gives channel-0-only gradient.""" + H, W = 6, 10 + data = np.zeros((H, W, 2), dtype=np.float32) + for x in range(W): + data[:, x, 0] = x / W # channel 0: linear in x + data[:, x, 1] = 0.0 # channel 1: constant + + tex = wp.Texture2D(data, normalized_coords=False, + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, device=device) + + @wp.kernel + def sample_2d_vec2(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=wp.vec2f)): + tid = wp.tid() + out[tid] = wp.texture_sample(tex, pos[tid], dtype=wp.vec2f) + + pos = wp.array([wp.vec2f(4.5, 3.0)], dtype=wp.vec2f, requires_grad=True, device=device) + out = wp.zeros(1, dtype=wp.vec2f, requires_grad=True, device=device) + tape = wp.Tape() + with tape: + wp.launch(sample_2d_vec2, dim=1, inputs=[tex, pos], outputs=[out], device=device) + # seed only channel 0 + out.grad = wp.array([wp.vec2f(1.0, 0.0)], dtype=wp.vec2f, device=device) + tape.backward() + + g = pos.grad.numpy()[0] + np.testing.assert_allclose(g[0], 1.0 / W, atol=1e-5) + np.testing.assert_allclose(g[1], 0.0, atol=1e-5) + + +def test_texture3d_adj_vec2f_linear_z(test, device): + """3D vec2f texture: z-grad is 1/D when only channel 0 is linear in z.""" + D, H, W = 8, 6, 10 + data = np.zeros((D, H, W, 2), dtype=np.float32) + for z in range(D): + data[z, :, :, 0] = z / D # channel 0: linear in z + data[z, :, :, 1] = 0.0 # channel 1: constant + + tex = wp.Texture3D(data, normalized_coords=False, + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, device=device) + + @wp.kernel + def sample_3d_vec2(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.array(dtype=wp.vec2f)): + tid = wp.tid() + out[tid] = wp.texture_sample(tex, pos[tid], dtype=wp.vec2f) + + pos = wp.array([wp.vec3f(4.5, 3.0, 3.0)], dtype=wp.vec3f, requires_grad=True, device=device) + out = wp.zeros(1, dtype=wp.vec2f, requires_grad=True, device=device) + tape = wp.Tape() + with tape: + wp.launch(sample_3d_vec2, dim=1, inputs=[tex, pos], outputs=[out], device=device) + out.grad = wp.array([wp.vec2f(1.0, 0.0)], dtype=wp.vec2f, device=device) + tape.backward() + + g = pos.grad.numpy()[0] + np.testing.assert_allclose(g[0], 0.0, atol=1e-5) + np.testing.assert_allclose(g[1], 0.0, atol=1e-5) + np.testing.assert_allclose(g[2], 1.0 / D, atol=1e-5) + # ============================================================================ # Test Class # ============================================================================ @@ -2853,6 +3177,45 @@ class TestTexture(unittest.TestCase): TestTexture, "test_texture_struct_both_members", test_texture_struct_both_members, devices=all_devices ) +# Adjoint +add_function_test( + TestTexture, "test_texture1d_adj_boundary_zero", test_texture1d_adj_boundary_zero, devices=all_devices +) +add_function_test( + TestTexture, "test_texture1d_adj_far_boundary_zero", test_texture1d_adj_far_boundary_zero, devices=all_devices +) +add_function_test(TestTexture, "test_texture1d_adj_closest_zero", test_texture1d_adj_closest_zero, devices=all_devices) +add_function_test( + TestTexture, "test_texture1d_adj_linear_signal", test_texture1d_adj_linear_signal, devices=all_devices +) +add_function_test( + TestTexture, "test_texture2d_adj_near_boundary_zero", test_texture2d_adj_near_boundary_zero, devices=all_devices +) +add_function_test( + TestTexture, "test_texture2d_adj_far_boundary_zero", test_texture2d_adj_far_boundary_zero, devices=all_devices +) +add_function_test( + TestTexture, "test_texture2d_adj_partial_boundary", test_texture2d_adj_partial_boundary, devices=all_devices +) +add_function_test(TestTexture, "test_texture2d_adj_linear_x", test_texture2d_adj_linear_x, devices=all_devices) +add_function_test(TestTexture, "test_texture2d_adj_linear_y", test_texture2d_adj_linear_y, devices=all_devices) +add_function_test( + TestTexture, "test_texture3d_adj_near_boundary_zero", test_texture3d_adj_near_boundary_zero, devices=all_devices +) +add_function_test( + TestTexture, "test_texture3d_adj_far_boundary_zero", test_texture3d_adj_far_boundary_zero, devices=all_devices +) +add_function_test( + TestTexture, "test_texture3d_adj_partial_boundary", test_texture3d_adj_partial_boundary, devices=all_devices +) +add_function_test(TestTexture, "test_texture3d_adj_uniform_zero", test_texture3d_adj_uniform_zero, devices=all_devices) +add_function_test(TestTexture, "test_texture3d_adj_linear_x", test_texture3d_adj_linear_x, devices=all_devices) +add_function_test(TestTexture, "test_texture3d_adj_linear_y", test_texture3d_adj_linear_y, devices=all_devices) +add_function_test(TestTexture, "test_texture3d_adj_linear_z", test_texture3d_adj_linear_z, devices=all_devices) +add_function_test(TestTexture, "test_texture2d_adj_vec2f_linear_x", test_texture2d_adj_vec2f_linear_x, devices=all_devices) +add_function_test(TestTexture, "test_texture2d_adj_vec2f_channels_independent", test_texture2d_adj_vec2f_channels_independent, devices=all_devices) +add_function_test(TestTexture, "test_texture3d_adj_vec2f_linear_z", test_texture3d_adj_vec2f_linear_z, devices=all_devices) + if __name__ == "__main__": unittest.main(verbosity=2) From 160515f996037b7d196a1ee5a05f94df6bbec096 Mon Sep 17 00:00:00 2001 From: Eric Shi Date: Wed, 18 Mar 2026 13:44:53 -0700 Subject: [PATCH 2/5] Update publications list with two new papers Add Kamino (GPU multi-body simulation) and ComFree-Sim (GPU-parallelized contact physics engine) to the 2026 publications. Signed-off-by: Eric Shi Signed-off-by: eigenvivek --- PUBLICATIONS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/PUBLICATIONS.md b/PUBLICATIONS.md index d69ffd1502..885a8f953f 100644 --- a/PUBLICATIONS.md +++ b/PUBLICATIONS.md @@ -8,6 +8,8 @@ pull request on GitHub or email a link to your arXiv preprint (preferred) or DOI ## 2026 - **Discovering neural cohesive zone laws from displacement fields**. *G. Barkoulis Gavris, W. Sun*. April 2026. [DOI:10.1016/j.cma.2026.118733](https://doi.org/10.1016/j.cma.2026.118733) +- **Kamino: GPU-based Massively Parallel Simulation of Multi-Body Systems with Challenging Topologies**. *V. Tsounis, G. Maloisel, C. Schumacher, R. Grandia, A. Serifi, D. Müller, C. Amevor, T. Widmer, M. Bächer*. March 2026. [arXiv:2603.16536](https://arxiv.org/abs/2603.16536) +- **ComFree-Sim: A GPU-Parallelized Analytical Contact Physics Engine for Scalable Contact-Rich Robotics Simulation and Control**. *C. Borse, Z. Xie, W. Huang, W. Jin*. March 2026. [arXiv:2603.12185](https://arxiv.org/abs/2603.12185) - **cuRoboV2: Dynamics-Aware Motion Generation with Depth-Fused Distance Fields for High-DoF Robots**. *B. Sundaralingam, A. Murali, S. Birchfield*. March 2026. [arXiv:2603.05493](https://arxiv.org/abs/2603.05493) - **GaussTwin: Unified Simulation and Correction with Gaussian Splatting for Robotic Digital Twins**. *Y. Cai, P. Jansonnie, C. de Farias, O. Arenz, J. Peters*. March 2026. [arXiv:2603.05108](https://arxiv.org/abs/2603.05108) - **X-Loco: Towards Generalist Humanoid Locomotion Control via Synergetic Policy Distillation**. *D. Wang, X. Wang, C. Zhang, J. Shi, Y. Zhao, C. Bai, X. Li*. March 2026. [arXiv:2603.03733](https://arxiv.org/abs/2603.03733) From d9a2f2316cea5db5fb2f0ce839ab22dc6b9ba157 Mon Sep 17 00:00:00 2001 From: eigenvivek Date: Wed, 18 Mar 2026 17:49:10 -0400 Subject: [PATCH 3/5] Restore two functions that somehow got deleted Signed-off-by: eigenvivek --- warp/_src/builtins.py | 22 +++++++++++++ warp/tests/cuda/test_texture.py | 56 +++++++++++++++++++++++---------- 2 files changed, 61 insertions(+), 17 deletions(-) diff --git a/warp/_src/builtins.py b/warp/_src/builtins.py index c1d783efa5..001501831c 100644 --- a/warp/_src/builtins.py +++ b/warp/_src/builtins.py @@ -9828,6 +9828,17 @@ def vector_assign_dispatch_func(input_types: Mapping[str, type], return_type: An group="Utility", ) +# Bool vector assign_inplace (bool is not part of Scalar) +add_builtin( + "assign_inplace", + input_types={"a": vector(length=Any, dtype=bool), "i": Any, "value": Any}, + value_type=None, + dispatch_func=vector_assign_dispatch_func, + hidden=True, + export=False, + group="Utility", +) + # implements quaternion[index] = value add_builtin( "assign_inplace", @@ -9866,6 +9877,17 @@ def vector_assign_copy_value_func(arg_types: Mapping[str, type], arg_values: Map group="Utility", ) +# Bool vector assign_copy (bool is not part of Scalar) +add_builtin( + "assign_copy", + input_types={"a": vector(length=Any, dtype=bool), "i": Any, "value": Any}, + value_func=vector_assign_copy_value_func, + dispatch_func=vector_assign_dispatch_func, + hidden=True, + export=False, + group="Utility", +) + # implements quaternion[index] = value, performs a copy internally if wp.config.enable_vector_component_overwrites is True add_builtin( "assign_copy", diff --git a/warp/tests/cuda/test_texture.py b/warp/tests/cuda/test_texture.py index d686ae10cd..b008a6eafc 100644 --- a/warp/tests/cuda/test_texture.py +++ b/warp/tests/cuda/test_texture.py @@ -2836,12 +2836,16 @@ def test_texture2d_adj_vec2f_linear_x(test, device): H, W = 6, 10 data = np.zeros((H, W, 2), dtype=np.float32) for x in range(W): - data[:, x, 0] = x / W # channel 0: linear in x + data[:, x, 0] = x / W # channel 0: linear in x data[:, x, 1] = (W - 1 - x) / W # channel 1: linear in x, reversed - tex = wp.Texture2D(data, normalized_coords=False, - filter_mode=wp.TextureFilterMode.LINEAR, - address_mode=wp.TextureAddressMode.BORDER, device=device) + tex = wp.Texture2D( + data, + normalized_coords=False, + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, + device=device, + ) @wp.kernel def sample_2d_vec2(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=wp.vec2f)): @@ -2868,12 +2872,16 @@ def test_texture2d_adj_vec2f_channels_independent(test, device): H, W = 6, 10 data = np.zeros((H, W, 2), dtype=np.float32) for x in range(W): - data[:, x, 0] = x / W # channel 0: linear in x - data[:, x, 1] = 0.0 # channel 1: constant + data[:, x, 0] = x / W # channel 0: linear in x + data[:, x, 1] = 0.0 # channel 1: constant - tex = wp.Texture2D(data, normalized_coords=False, - filter_mode=wp.TextureFilterMode.LINEAR, - address_mode=wp.TextureAddressMode.BORDER, device=device) + tex = wp.Texture2D( + data, + normalized_coords=False, + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, + device=device, + ) @wp.kernel def sample_2d_vec2(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=wp.vec2f)): @@ -2899,12 +2907,16 @@ def test_texture3d_adj_vec2f_linear_z(test, device): D, H, W = 8, 6, 10 data = np.zeros((D, H, W, 2), dtype=np.float32) for z in range(D): - data[z, :, :, 0] = z / D # channel 0: linear in z - data[z, :, :, 1] = 0.0 # channel 1: constant + data[z, :, :, 0] = z / D # channel 0: linear in z + data[z, :, :, 1] = 0.0 # channel 1: constant - tex = wp.Texture3D(data, normalized_coords=False, - filter_mode=wp.TextureFilterMode.LINEAR, - address_mode=wp.TextureAddressMode.BORDER, device=device) + tex = wp.Texture3D( + data, + normalized_coords=False, + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, + device=device, + ) @wp.kernel def sample_3d_vec2(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.array(dtype=wp.vec2f)): @@ -2924,6 +2936,7 @@ def sample_3d_vec2(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.arr np.testing.assert_allclose(g[1], 0.0, atol=1e-5) np.testing.assert_allclose(g[2], 1.0 / D, atol=1e-5) + # ============================================================================ # Test Class # ============================================================================ @@ -3212,9 +3225,18 @@ class TestTexture(unittest.TestCase): add_function_test(TestTexture, "test_texture3d_adj_linear_x", test_texture3d_adj_linear_x, devices=all_devices) add_function_test(TestTexture, "test_texture3d_adj_linear_y", test_texture3d_adj_linear_y, devices=all_devices) add_function_test(TestTexture, "test_texture3d_adj_linear_z", test_texture3d_adj_linear_z, devices=all_devices) -add_function_test(TestTexture, "test_texture2d_adj_vec2f_linear_x", test_texture2d_adj_vec2f_linear_x, devices=all_devices) -add_function_test(TestTexture, "test_texture2d_adj_vec2f_channels_independent", test_texture2d_adj_vec2f_channels_independent, devices=all_devices) -add_function_test(TestTexture, "test_texture3d_adj_vec2f_linear_z", test_texture3d_adj_vec2f_linear_z, devices=all_devices) +add_function_test( + TestTexture, "test_texture2d_adj_vec2f_linear_x", test_texture2d_adj_vec2f_linear_x, devices=all_devices +) +add_function_test( + TestTexture, + "test_texture2d_adj_vec2f_channels_independent", + test_texture2d_adj_vec2f_channels_independent, + devices=all_devices, +) +add_function_test( + TestTexture, "test_texture3d_adj_vec2f_linear_z", test_texture3d_adj_vec2f_linear_z, devices=all_devices +) if __name__ == "__main__": From 057a3ebb810dc4c7782fae485fd1ac66d2d63087 Mon Sep 17 00:00:00 2001 From: eigenvivek Date: Wed, 18 Mar 2026 20:28:04 -0400 Subject: [PATCH 4/5] Add warnings for non-border modes Signed-off-by: eigenvivek --- warp/_src/texture.py | 27 ++++++++++++++++ warp/native/texture.h | 72 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 94 insertions(+), 5 deletions(-) diff --git a/warp/_src/texture.py b/warp/_src/texture.py index 59b8e1a32e..886dcaae14 100644 --- a/warp/_src/texture.py +++ b/warp/_src/texture.py @@ -7,6 +7,7 @@ import ctypes import enum +import warnings from typing import TYPE_CHECKING, ClassVar import numpy as np @@ -176,6 +177,13 @@ class Texture: ``wp.float16``, and ``wp.float32`` data types. Unsigned integer textures are read as normalized floats in [0, 1]; signed integer textures are normalized to [-1, 1]; float types are returned as-is. + .. warning:: + **Automatic differentiation is only correct when all texture address modes are set to BORDER.** + Using ``wp.texture_sample()`` with ``requires_grad=True`` on textures with WRAP, CLAMP, or + MIRROR address modes will produce silent gradient errors. The gradient computation zeros out + when sampling positions straddle texture boundaries, which is correct for BORDER mode but + incorrect for other modes where the forward pass returns valid interpolated data. + This class should not be instantiated directly. A specific subclass should be used instead (:class:`Texture1D`, :class:`Texture2D`, or :class:`Texture3D`). @@ -275,6 +283,25 @@ def __init__( self._resolve_address_mode(address_mode, address_mode_w, 2) if ndim > 2 else TextureAddressMode.CLAMP ) + # Warn if using non-BORDER address modes (differentiation only supports BORDER) + non_border_modes = [] + if address_mode_u != TextureAddressMode.BORDER: + non_border_modes.append(f"U={TextureAddressMode(address_mode_u).name}") + if ndim > 1 and address_mode_v != TextureAddressMode.BORDER: + non_border_modes.append(f"V={TextureAddressMode(address_mode_v).name}") + if ndim > 2 and address_mode_w != TextureAddressMode.BORDER: + non_border_modes.append(f"W={TextureAddressMode(address_mode_w).name}") + + if non_border_modes: + warnings.warn( + f"Texture created with non-BORDER address mode(s): {', '.join(non_border_modes)}. " + f"Automatic differentiation (wp.texture_sample with requires_grad=True) only produces " + f"correct gradients when all address modes are BORDER. Non-BORDER modes will silently " + f"return incorrect gradients at texture boundaries.", + UserWarning, + stacklevel=2, + ) + # if an external CUDA array was given, infer texture shape and dtype from it if cuda_array: if not device.is_cuda: diff --git a/warp/native/texture.h b/warp/native/texture.h index ac49cecc79..e4f0ea2b73 100644 --- a/warp/native/texture.h +++ b/warp/native/texture.h @@ -857,11 +857,24 @@ template CUDA_CALLABLE T texture_sample(const texture3d_t& tex, flo return texture_sample_helper::sample_3d(tex, u, v, w); } -// Adjoints for texture sampling w.r.t. sampling coordinates. -// Gradients w.r.t. texture data are not supported; adj_tex is a no-op. -// On GPU, requires filter_mode and use_normalized_coords in the descriptor. -// Boundary behavior matches PyTorch grid_sample with padding_mode="border": -// gradient is zero when the sampling position straddles a volume boundary. +// ============================================================================ +// Texture Sampling Adjoints +// ============================================================================ +// +// IMPORTANT: Differentiation is only correct when all texture address modes +// are set to BORDER (WP_TEXTURE_ADDRESS_BORDER = 3). +// +// The gradient computation zeros out when sampling positions straddle texture +// boundaries, which is correct for BORDER mode (returns 0 outside bounds) but +// incorrect for WRAP/MIRROR/CLAMP modes where the forward pass returns valid +// interpolated data across boundaries. +// +// Using differentiation with WRAP (mode 0), CLAMP (mode 1), or MIRROR (mode 2) +// will silently produce incorrect gradients without error or warning. +// +// Future work: Implement proper gradient computation for all address modes. +// ============================================================================ + template CUDA_CALLABLE void adj_texture_sample(const texture1d_t& tex, float u, texture1d_t& adj_tex, float& adj_u, const T& adj_ret) @@ -869,6 +882,23 @@ adj_texture_sample(const texture1d_t& tex, float u, texture1d_t& adj_tex, float& if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST) return; +#ifndef NDEBUG +// Warning: This check is only active in debug builds +// Differentiation is only correct for BORDER address mode +#if !defined(__CUDA_ARCH__) + if (tex.tex != 0) { + const Texture* cpu_tex = (const Texture*)tex.tex; + if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER) { + printf( + "WARNING: texture_sample adjoint may produce incorrect gradients. " + "Address mode is %d but differentiation only supports BORDER mode (3).\n", + cpu_tex->address_mode_u + ); + } + } +#endif +#endif + float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f; #if defined(__CUDA_ARCH__) @@ -927,6 +957,22 @@ CUDA_CALLABLE void adj_texture_sample( if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST) return; +#ifndef NDEBUG +#if !defined(__CUDA_ARCH__) + if (tex.tex != 0) { + const Texture* cpu_tex = (const Texture*)tex.tex; + if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER + || cpu_tex->address_mode_v != WP_TEXTURE_ADDRESS_BORDER) { + printf( + "WARNING: texture_sample adjoint may produce incorrect gradients. " + "Address modes are (%d, %d) but differentiation only supports BORDER mode (3).\n", + cpu_tex->address_mode_u, cpu_tex->address_mode_v + ); + } + } +#endif +#endif + float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f; float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f; @@ -1034,6 +1080,22 @@ CUDA_CALLABLE void adj_texture_sample( if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST) return; +#ifndef NDEBUG +#if !defined(__CUDA_ARCH__) + if (tex.tex != 0) { + const Texture* cpu_tex = (const Texture*)tex.tex; + if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER || cpu_tex->address_mode_v != WP_TEXTURE_ADDRESS_BORDER + || cpu_tex->address_mode_w != WP_TEXTURE_ADDRESS_BORDER) { + printf( + "WARNING: texture_sample adjoint may produce incorrect gradients. " + "Address modes are (%d, %d, %d) but differentiation only supports BORDER mode (3).\n", + cpu_tex->address_mode_u, cpu_tex->address_mode_v, cpu_tex->address_mode_w + ); + } + } +#endif +#endif + float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f; float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f; float gtz_mult = tex.use_normalized_coords ? (float)tex.depth : 1.0f; From 1d30e722659e66d8b70ac0ce1ae63141dfa337c6 Mon Sep 17 00:00:00 2001 From: eigenvivek Date: Wed, 18 Mar 2026 21:41:39 -0400 Subject: [PATCH 5/5] Update Signed-off-by: eigenvivek --- PUBLICATIONS.md | 2 - warp/_src/texture.py | 83 ++++++++++++------- warp/native/texture.h | 129 +++++++++++++++++++----------- warp/tests/cuda/test_texture.py | 137 ++++++++++++++++++++++++++++++++ 4 files changed, 275 insertions(+), 76 deletions(-) diff --git a/PUBLICATIONS.md b/PUBLICATIONS.md index 885a8f953f..d69ffd1502 100644 --- a/PUBLICATIONS.md +++ b/PUBLICATIONS.md @@ -8,8 +8,6 @@ pull request on GitHub or email a link to your arXiv preprint (preferred) or DOI ## 2026 - **Discovering neural cohesive zone laws from displacement fields**. *G. Barkoulis Gavris, W. Sun*. April 2026. [DOI:10.1016/j.cma.2026.118733](https://doi.org/10.1016/j.cma.2026.118733) -- **Kamino: GPU-based Massively Parallel Simulation of Multi-Body Systems with Challenging Topologies**. *V. Tsounis, G. Maloisel, C. Schumacher, R. Grandia, A. Serifi, D. Müller, C. Amevor, T. Widmer, M. Bächer*. March 2026. [arXiv:2603.16536](https://arxiv.org/abs/2603.16536) -- **ComFree-Sim: A GPU-Parallelized Analytical Contact Physics Engine for Scalable Contact-Rich Robotics Simulation and Control**. *C. Borse, Z. Xie, W. Huang, W. Jin*. March 2026. [arXiv:2603.12185](https://arxiv.org/abs/2603.12185) - **cuRoboV2: Dynamics-Aware Motion Generation with Depth-Fused Distance Fields for High-DoF Robots**. *B. Sundaralingam, A. Murali, S. Birchfield*. March 2026. [arXiv:2603.05493](https://arxiv.org/abs/2603.05493) - **GaussTwin: Unified Simulation and Correction with Gaussian Splatting for Robotic Digital Twins**. *Y. Cai, P. Jansonnie, C. de Farias, O. Arenz, J. Peters*. March 2026. [arXiv:2603.05108](https://arxiv.org/abs/2603.05108) - **X-Loco: Towards Generalist Humanoid Locomotion Control via Synergetic Policy Distillation**. *D. Wang, X. Wang, C. Zhang, J. Shi, Y. Zhao, C. Bai, X. Li*. March 2026. [arXiv:2603.03733](https://arxiv.org/abs/2603.03733) diff --git a/warp/_src/texture.py b/warp/_src/texture.py index 886dcaae14..773d4ad483 100644 --- a/warp/_src/texture.py +++ b/warp/_src/texture.py @@ -7,7 +7,6 @@ import ctypes import enum -import warnings from typing import TYPE_CHECKING, ClassVar import numpy as np @@ -83,14 +82,16 @@ class texture1d_t(ctypes.Structure): ("num_channels", ctypes.c_int32), ("filter_mode", ctypes.c_int32), ("use_normalized_coords", ctypes.c_int32), + ("address_mode_u", ctypes.c_int32), ) - def __init__(self, tex=0, width=0, num_channels=0, filter_mode=0, use_normalized_coords=1): + def __init__(self, tex=0, width=0, num_channels=0, filter_mode=0, use_normalized_coords=1, address_mode_u=0): self.tex = tex self.width = width self.num_channels = num_channels self.filter_mode = filter_mode self.use_normalized_coords = use_normalized_coords + self.address_mode_u = address_mode_u class texture2d_t(ctypes.Structure): @@ -106,15 +107,29 @@ class texture2d_t(ctypes.Structure): ("num_channels", ctypes.c_int32), ("filter_mode", ctypes.c_int32), ("use_normalized_coords", ctypes.c_int32), + ("address_mode_u", ctypes.c_int32), + ("address_mode_v", ctypes.c_int32), ) - def __init__(self, tex=0, width=0, height=0, num_channels=0, filter_mode=0, use_normalized_coords=1): + def __init__( + self, + tex=0, + width=0, + height=0, + num_channels=0, + filter_mode=0, + use_normalized_coords=1, + address_mode_u=0, + address_mode_v=0, + ): self.tex = tex self.width = width self.height = height self.num_channels = num_channels self.filter_mode = filter_mode self.use_normalized_coords = use_normalized_coords + self.address_mode_u = address_mode_u + self.address_mode_v = address_mode_v class texture3d_t(ctypes.Structure): @@ -131,9 +146,24 @@ class texture3d_t(ctypes.Structure): ("num_channels", ctypes.c_int32), ("filter_mode", ctypes.c_int32), ("use_normalized_coords", ctypes.c_int32), + ("address_mode_u", ctypes.c_int32), + ("address_mode_v", ctypes.c_int32), + ("address_mode_w", ctypes.c_int32), ) - def __init__(self, tex=0, width=0, height=0, depth=0, num_channels=0, filter_mode=0, use_normalized_coords=1): + def __init__( + self, + tex=0, + width=0, + height=0, + depth=0, + num_channels=0, + filter_mode=0, + use_normalized_coords=1, + address_mode_u=0, + address_mode_v=0, + address_mode_w=0, + ): self.tex = tex self.width = width self.height = height @@ -141,6 +171,9 @@ def __init__(self, tex=0, width=0, height=0, depth=0, num_channels=0, filter_mod self.num_channels = num_channels self.filter_mode = filter_mode self.use_normalized_coords = use_normalized_coords + self.address_mode_u = address_mode_u + self.address_mode_v = address_mode_v + self.address_mode_w = address_mode_w class cuda_array_desc_t(ctypes.Structure): @@ -178,11 +211,18 @@ class Texture: floats in [0, 1]; signed integer textures are normalized to [-1, 1]; float types are returned as-is. .. warning:: - **Automatic differentiation is only correct when all texture address modes are set to BORDER.** - Using ``wp.texture_sample()`` with ``requires_grad=True`` on textures with WRAP, CLAMP, or - MIRROR address modes will produce silent gradient errors. The gradient computation zeros out - when sampling positions straddle texture boundaries, which is correct for BORDER mode but - incorrect for other modes where the forward pass returns valid interpolated data. + **Automatic differentiation with LINEAR filtering is only correct when all texture + address modes are set to BORDER.** + + Using ``wp.texture_sample()`` with ``requires_grad=True``, ``filter_mode=LINEAR``, + and address modes other than BORDER (WRAP/CLAMP/MIRROR) will produce silent gradient + errors at texture boundaries. The gradient computation assumes BORDER behavior + (returns zero outside bounds). + + If you need automatic differentiation with LINEAR filtering, create textures with + ``address_mode=wp.TextureAddressMode.BORDER``. CLOSEST filtering does not have this + limitation (gradients are always zero). + This class should not be instantiated directly. A specific subclass should be used instead (:class:`Texture1D`, :class:`Texture2D`, or :class:`Texture3D`). @@ -283,25 +323,6 @@ def __init__( self._resolve_address_mode(address_mode, address_mode_w, 2) if ndim > 2 else TextureAddressMode.CLAMP ) - # Warn if using non-BORDER address modes (differentiation only supports BORDER) - non_border_modes = [] - if address_mode_u != TextureAddressMode.BORDER: - non_border_modes.append(f"U={TextureAddressMode(address_mode_u).name}") - if ndim > 1 and address_mode_v != TextureAddressMode.BORDER: - non_border_modes.append(f"V={TextureAddressMode(address_mode_v).name}") - if ndim > 2 and address_mode_w != TextureAddressMode.BORDER: - non_border_modes.append(f"W={TextureAddressMode(address_mode_w).name}") - - if non_border_modes: - warnings.warn( - f"Texture created with non-BORDER address mode(s): {', '.join(non_border_modes)}. " - f"Automatic differentiation (wp.texture_sample with requires_grad=True) only produces " - f"correct gradients when all address modes are BORDER. Non-BORDER modes will silently " - f"return incorrect gradients at texture boundaries.", - UserWarning, - stacklevel=2, - ) - # if an external CUDA array was given, infer texture shape and dtype from it if cuda_array: if not device.is_cuda: @@ -1001,6 +1022,7 @@ def __ctype__(self) -> texture1d_t: self._num_channels, int(self._filter_mode), int(self._normalized_coords), + int(self._address_mode_u), ) @@ -1085,6 +1107,8 @@ def __ctype__(self) -> texture2d_t: self._num_channels, int(self._filter_mode), int(self._normalized_coords), + int(self._address_mode_u), + int(self._address_mode_v), ) @@ -1174,6 +1198,9 @@ def __ctype__(self) -> texture3d_t: self._num_channels, int(self._filter_mode), int(self._normalized_coords), + int(self._address_mode_u), + int(self._address_mode_v), + int(self._address_mode_w), ) diff --git a/warp/native/texture.h b/warp/native/texture.h index e4f0ea2b73..90fc6e0f00 100644 --- a/warp/native/texture.h +++ b/warp/native/texture.h @@ -157,6 +157,7 @@ struct texture1d_t { int32 num_channels; int32 filter_mode; int32 use_normalized_coords; + int32 address_mode_u; CUDA_CALLABLE inline texture1d_t() : tex(0) @@ -164,17 +165,24 @@ struct texture1d_t { , num_channels(0) , filter_mode(0) , use_normalized_coords(1) + , address_mode_u(0) { } CUDA_CALLABLE inline texture1d_t( - uint64 tex, int32 width, int32 num_channels, int32 filter_mode, int32 use_normalized_coords + uint64 tex, + int32 width, + int32 num_channels, + int32 filter_mode, + int32 use_normalized_coords, + int32 address_mode_u ) : tex(tex) , width(width) , num_channels(num_channels) , filter_mode(filter_mode) , use_normalized_coords(use_normalized_coords) + , address_mode_u(address_mode_u) { } }; @@ -186,6 +194,8 @@ struct texture2d_t { int32 num_channels; int32 filter_mode; int32 use_normalized_coords; + int32 address_mode_u; + int32 address_mode_v; CUDA_CALLABLE inline texture2d_t() : tex(0) @@ -194,11 +204,20 @@ struct texture2d_t { , num_channels(0) , filter_mode(0) , use_normalized_coords(1) + , address_mode_u(0) + , address_mode_v(0) { } CUDA_CALLABLE inline texture2d_t( - uint64 tex, int32 width, int32 height, int32 num_channels, int32 filter_mode, int32 use_normalized_coords + uint64 tex, + int32 width, + int32 height, + int32 num_channels, + int32 filter_mode, + int32 use_normalized_coords, + int32 address_mode_u, + int32 address_mode_v ) : tex(tex) , width(width) @@ -206,6 +225,8 @@ struct texture2d_t { , num_channels(num_channels) , filter_mode(filter_mode) , use_normalized_coords(use_normalized_coords) + , address_mode_u(address_mode_u) + , address_mode_v(address_mode_v) { } }; @@ -218,6 +239,9 @@ struct texture3d_t { int32 num_channels; int32 filter_mode; int32 use_normalized_coords; + int32 address_mode_u; + int32 address_mode_v; + int32 address_mode_w; CUDA_CALLABLE inline texture3d_t() : tex(0) @@ -227,6 +251,9 @@ struct texture3d_t { , num_channels(0) , filter_mode(0) , use_normalized_coords(1) + , address_mode_u(0) + , address_mode_v(0) + , address_mode_w(0) { } @@ -237,7 +264,10 @@ struct texture3d_t { int32 depth, int32 num_channels, int32 filter_mode, - int32 use_normalized_coords + int32 use_normalized_coords, + int32 address_mode_u, + int32 address_mode_v, + int32 address_mode_w ) : tex(tex) , width(width) @@ -246,6 +276,9 @@ struct texture3d_t { , num_channels(num_channels) , filter_mode(filter_mode) , use_normalized_coords(use_normalized_coords) + , address_mode_u(address_mode_u) + , address_mode_v(address_mode_v) + , address_mode_w(address_mode_w) { } }; @@ -882,22 +915,23 @@ adj_texture_sample(const texture1d_t& tex, float u, texture1d_t& adj_tex, float& if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST) return; -#ifndef NDEBUG -// Warning: This check is only active in debug builds -// Differentiation is only correct for BORDER address mode -#if !defined(__CUDA_ARCH__) - if (tex.tex != 0) { - const Texture* cpu_tex = (const Texture*)tex.tex; - if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER) { - printf( - "WARNING: texture_sample adjoint may produce incorrect gradients. " - "Address mode is %d but differentiation only supports BORDER mode (3).\n", - cpu_tex->address_mode_u - ); - } - } -#endif + // Check address mode compatibility with differentiation + if (tex.address_mode_u != WP_TEXTURE_ADDRESS_BORDER) { +#if defined(__CUDA_ARCH__) + printf( + "ERROR: texture_sample gradient computation requires BORDER address mode. " + "Texture has address_mode_u=%d. Gradients will be incorrect.\n", + tex.address_mode_u + ); +#else + printf( + "ERROR: texture_sample gradient computation requires BORDER address mode. " + "Texture has address_mode_u=%d. Gradients will be incorrect.\n", + tex.address_mode_u + ); #endif + return; // Return zero gradient + } float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f; @@ -957,21 +991,22 @@ CUDA_CALLABLE void adj_texture_sample( if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST) return; -#ifndef NDEBUG -#if !defined(__CUDA_ARCH__) - if (tex.tex != 0) { - const Texture* cpu_tex = (const Texture*)tex.tex; - if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER - || cpu_tex->address_mode_v != WP_TEXTURE_ADDRESS_BORDER) { - printf( - "WARNING: texture_sample adjoint may produce incorrect gradients. " - "Address modes are (%d, %d) but differentiation only supports BORDER mode (3).\n", - cpu_tex->address_mode_u, cpu_tex->address_mode_v - ); - } - } -#endif + if (tex.address_mode_u != WP_TEXTURE_ADDRESS_BORDER || tex.address_mode_v != WP_TEXTURE_ADDRESS_BORDER) { +#if defined(__CUDA_ARCH__) + printf( + "ERROR: texture_sample gradient computation requires BORDER address mode. " + "Texture has address modes (%d, %d). Gradients will be incorrect.\n", + tex.address_mode_u, tex.address_mode_v + ); +#else + printf( + "ERROR: texture_sample gradient computation requires BORDER address mode. " + "Texture has address modes (%d, %d). Gradients will be incorrect.\n", + tex.address_mode_u, tex.address_mode_v + ); #endif + return; + } float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f; float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f; @@ -1080,21 +1115,23 @@ CUDA_CALLABLE void adj_texture_sample( if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST) return; -#ifndef NDEBUG -#if !defined(__CUDA_ARCH__) - if (tex.tex != 0) { - const Texture* cpu_tex = (const Texture*)tex.tex; - if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER || cpu_tex->address_mode_v != WP_TEXTURE_ADDRESS_BORDER - || cpu_tex->address_mode_w != WP_TEXTURE_ADDRESS_BORDER) { - printf( - "WARNING: texture_sample adjoint may produce incorrect gradients. " - "Address modes are (%d, %d, %d) but differentiation only supports BORDER mode (3).\n", - cpu_tex->address_mode_u, cpu_tex->address_mode_v, cpu_tex->address_mode_w - ); - } - } -#endif + if (tex.address_mode_u != WP_TEXTURE_ADDRESS_BORDER || tex.address_mode_v != WP_TEXTURE_ADDRESS_BORDER + || tex.address_mode_w != WP_TEXTURE_ADDRESS_BORDER) { +#if defined(__CUDA_ARCH__) + printf( + "ERROR: texture_sample gradient computation requires BORDER address mode. " + "Texture has address modes (%d, %d, %d). Gradients will be incorrect.\n", + tex.address_mode_u, tex.address_mode_v, tex.address_mode_w + ); +#else + printf( + "ERROR: texture_sample gradient computation requires BORDER address mode. " + "Texture has address modes (%d, %d, %d). Gradients will be incorrect.\n", + tex.address_mode_u, tex.address_mode_v, tex.address_mode_w + ); #endif + return; + } float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f; float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f; diff --git a/warp/tests/cuda/test_texture.py b/warp/tests/cuda/test_texture.py index b008a6eafc..46780f1056 100644 --- a/warp/tests/cuda/test_texture.py +++ b/warp/tests/cuda/test_texture.py @@ -2677,6 +2677,63 @@ def _grad_3d(data, coord, device): return pos.grad.numpy()[0] +def _grad_1d_normalized(data, u_normalized, device): + """Helper for 1D gradient with normalized coordinates.""" + tex = wp.Texture1D( + data, + normalized_coords=True, # Use default normalized coordinates + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, + device=device, + ) + pos = wp.array([u_normalized], dtype=float, requires_grad=True, device=device) + out = wp.zeros(1, dtype=float, requires_grad=True, device=device) + tape = wp.Tape() + with tape: + wp.launch(sample_1d, dim=1, inputs=[tex, pos], outputs=[out], device=device) + out.grad = wp.ones(1, dtype=float, device=device) + tape.backward() + return pos.grad.numpy()[0] + + +def _grad_2d_normalized(data, coord_normalized, device): + """Helper for 2D gradient with normalized coordinates.""" + tex = wp.Texture2D( + data, + normalized_coords=True, # Use default normalized coordinates + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, + device=device, + ) + pos = wp.array([wp.vec2f(*coord_normalized)], dtype=wp.vec2f, requires_grad=True, device=device) + out = wp.zeros(1, dtype=float, requires_grad=True, device=device) + tape = wp.Tape() + with tape: + wp.launch(sample_2d, dim=1, inputs=[tex, pos], outputs=[out], device=device) + out.grad = wp.ones(1, dtype=float, device=device) + tape.backward() + return pos.grad.numpy()[0] + + +def _grad_3d_normalized(data, coord_normalized, device): + """Helper for 3D gradient with normalized coordinates.""" + tex = wp.Texture3D( + data, + normalized_coords=True, # Use default normalized coordinates + filter_mode=wp.TextureFilterMode.LINEAR, + address_mode=wp.TextureAddressMode.BORDER, + device=device, + ) + pos = wp.array([wp.vec3f(*coord_normalized)], dtype=wp.vec3f, requires_grad=True, device=device) + out = wp.zeros(1, dtype=float, requires_grad=True, device=device) + tape = wp.Tape() + with tape: + wp.launch(sample_3d, dim=1, inputs=[tex, pos], outputs=[out], device=device) + out.grad = wp.ones(1, dtype=float, device=device) + tape.backward() + return pos.grad.numpy()[0] + + def test_texture1d_adj_boundary_zero(test, device): """Gradient is zero when sampling position straddles the near boundary.""" data = np.random.default_rng(0).standard_normal(16).astype(np.float32) @@ -2937,6 +2994,68 @@ def sample_3d_vec2(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.arr np.testing.assert_allclose(g[2], 1.0 / D, atol=1e-5) +def test_texture1d_adj_normalized_boundary(test, device): + """1D normalized coords: gradient is zero at boundary (u ≈ 0.0 or u ≈ 1.0).""" + data = np.random.default_rng(10).standard_normal(16).astype(np.float32) + g_near = _grad_1d_normalized(data, 0.01, device) + g_far = _grad_1d_normalized(data, 0.99, device) + np.testing.assert_allclose(g_near, 0.0, atol=1e-6) + np.testing.assert_allclose(g_far, 0.0, atol=1e-6) + + +def test_texture1d_adj_normalized_linear(test, device): + """1D normalized coords: gradient of linear signal.""" + W = 16 + data = np.arange(W, dtype=np.float32) / W + # With normalized coords, d(value)/d(u_norm) = d(value)/d(u_texel) * d(u_texel)/d(u_norm) + # = (1/W) * W = 1.0 + g = _grad_1d_normalized(data, 0.5, device) + np.testing.assert_allclose(g, 1.0, atol=1e-4) + + +def test_texture2d_adj_normalized_boundary(test, device): + """2D normalized coords: gradient is zero at boundaries.""" + data = np.random.default_rng(11).standard_normal((8, 10)).astype(np.float32) + g_near = _grad_2d_normalized(data, (0.01, 0.01), device) + g_far = _grad_2d_normalized(data, (0.99, 0.99), device) + np.testing.assert_allclose(g_near, [0.0, 0.0], atol=1e-6) + np.testing.assert_allclose(g_far, [0.0, 0.0], atol=1e-6) + + +def test_texture2d_adj_normalized_linear_x(test, device): + """2D normalized coords: x-gradient of signal linear in x.""" + H, W = 6, 10 + data = np.zeros((H, W), dtype=np.float32) + for x in range(W): + data[:, x] = x / W + g = _grad_2d_normalized(data, (0.5, 0.5), device) + # With normalized coords: d(value)/d(u_norm) = 1.0 + np.testing.assert_allclose(g[0], 1.0, atol=1e-4) + np.testing.assert_allclose(g[1], 0.0, atol=1e-5) + + +def test_texture3d_adj_normalized_boundary(test, device): + """3D normalized coords: gradient is zero at boundaries.""" + data = np.random.default_rng(12).standard_normal((8, 6, 10)).astype(np.float32) + g_near = _grad_3d_normalized(data, (0.01, 0.01, 0.01), device) + g_far = _grad_3d_normalized(data, (0.99, 0.99, 0.99), device) + np.testing.assert_allclose(g_near, [0.0, 0.0, 0.0], atol=1e-6) + np.testing.assert_allclose(g_far, [0.0, 0.0, 0.0], atol=1e-6) + + +def test_texture3d_adj_normalized_linear_z(test, device): + """3D normalized coords: z-gradient of signal linear in z.""" + D, H, W = 8, 6, 10 + data = np.zeros((D, H, W), dtype=np.float32) + for z in range(D): + data[z, :, :] = z / D + g = _grad_3d_normalized(data, (0.5, 0.5, 0.5), device) + # With normalized coords: d(value)/d(w_norm) = 1.0 + np.testing.assert_allclose(g[0], 0.0, atol=1e-5) + np.testing.assert_allclose(g[1], 0.0, atol=1e-5) + np.testing.assert_allclose(g[2], 1.0, atol=1e-4) + + # ============================================================================ # Test Class # ============================================================================ @@ -3237,6 +3356,24 @@ class TestTexture(unittest.TestCase): add_function_test( TestTexture, "test_texture3d_adj_vec2f_linear_z", test_texture3d_adj_vec2f_linear_z, devices=all_devices ) +add_function_test( + TestTexture, "test_texture1d_adj_normalized_boundary", test_texture1d_adj_normalized_boundary, devices=all_devices +) +add_function_test( + TestTexture, "test_texture1d_adj_normalized_linear", test_texture1d_adj_normalized_linear, devices=all_devices +) +add_function_test( + TestTexture, "test_texture2d_adj_normalized_boundary", test_texture2d_adj_normalized_boundary, devices=all_devices +) +add_function_test( + TestTexture, "test_texture2d_adj_normalized_linear_x", test_texture2d_adj_normalized_linear_x, devices=all_devices +) +add_function_test( + TestTexture, "test_texture3d_adj_normalized_boundary", test_texture3d_adj_normalized_boundary, devices=all_devices +) +add_function_test( + TestTexture, "test_texture3d_adj_normalized_linear_z", test_texture3d_adj_normalized_linear_z, devices=all_devices +) if __name__ == "__main__":