From a510eb0651b8c25688cdb8f8ef0a38f9ca868a95 Mon Sep 17 00:00:00 2001
From: eigenvivek <vivekg@mit.edu>
Date: Wed, 18 Mar 2026 17:28:15 -0400
Subject: [PATCH 1/5] Add adjoints for `texture_sample`
 ([GH-1301](https://github.com/NVIDIA/warp/issues/1301)).

Signed-off-by: eigenvivek <vivekg@mit.edu>
---
 warp/_src/builtins.py           |  32 +--
 warp/_src/texture.py            |  45 +++-
 warp/native/texture.h           | 349 +++++++++++++++++++++++++++++-
 warp/tests/cuda/test_texture.py | 363 ++++++++++++++++++++++++++++++++
 4 files changed, 747 insertions(+), 42 deletions(-)

diff --git a/warp/_src/builtins.py b/warp/_src/builtins.py
index e068556a67..c1d783efa5 100644
--- a/warp/_src/builtins.py
+++ b/warp/_src/builtins.py
@@ -7809,7 +7809,7 @@ def texture_sample_1d_dispatch_func(input_types: Mapping[str, type], return_type
         The sampled value of the specified ``dtype``.
 
     Filtering mode is :attr:`warp.TextureFilterMode.CLOSEST` or :attr:`warp.TextureFilterMode.LINEAR`.""",
-    is_differentiable=False,
+    is_differentiable=True,
 )
 
 
@@ -7856,7 +7856,7 @@ def texture_sample_2d_dispatch_func(input_types: Mapping[str, type], return_type
         The sampled value of the specified ``dtype``.
 
     Filtering mode is :attr:`warp.TextureFilterMode.CLOSEST` or :attr:`warp.TextureFilterMode.LINEAR`.""",
-    is_differentiable=False,
+    is_differentiable=True,
 )
 
 # texture_sample for 2D textures with separate u, v coordinates
@@ -7886,7 +7886,7 @@ def texture_sample_2d_dispatch_func(input_types: Mapping[str, type], return_type
         The sampled value of the specified ``dtype``.
 
     Filtering mode is :attr:`warp.TextureFilterMode.CLOSEST` or :attr:`warp.TextureFilterMode.LINEAR`.""",
-    is_differentiable=False,
+    is_differentiable=True,
 )
 
 
@@ -7933,7 +7933,7 @@ def texture_sample_3d_dispatch_func(input_types: Mapping[str, type], return_type
         The sampled value of the specified ``dtype``.
 
     Filtering mode is :attr:`warp.TextureFilterMode.CLOSEST` or :attr:`warp.TextureFilterMode.LINEAR`.""",
-    is_differentiable=False,
+    is_differentiable=True,
 )
 
 # texture_sample for 3D textures with separate u, v, w coordinates
@@ -7965,7 +7965,7 @@ def texture_sample_3d_dispatch_func(input_types: Mapping[str, type], return_type
         The sampled value of the specified ``dtype``.
 
     Filtering mode is :attr:`warp.TextureFilterMode.CLOSEST` or :attr:`warp.TextureFilterMode.LINEAR`.""",
-    is_differentiable=False,
+    is_differentiable=True,
 )
 
 
@@ -9828,17 +9828,6 @@ def vector_assign_dispatch_func(input_types: Mapping[str, type], return_type: An
     group="Utility",
 )
 
-# Bool vector assign_inplace (bool is not part of Scalar)
-add_builtin(
-    "assign_inplace",
-    input_types={"a": vector(length=Any, dtype=bool), "i": Any, "value": Any},
-    value_type=None,
-    dispatch_func=vector_assign_dispatch_func,
-    hidden=True,
-    export=False,
-    group="Utility",
-)
-
 # implements quaternion[index] = value
 add_builtin(
     "assign_inplace",
@@ -9877,17 +9866,6 @@ def vector_assign_copy_value_func(arg_types: Mapping[str, type], arg_values: Map
     group="Utility",
 )
 
-# Bool vector assign_copy (bool is not part of Scalar)
-add_builtin(
-    "assign_copy",
-    input_types={"a": vector(length=Any, dtype=bool), "i": Any, "value": Any},
-    value_func=vector_assign_copy_value_func,
-    dispatch_func=vector_assign_dispatch_func,
-    hidden=True,
-    export=False,
-    group="Utility",
-)
-
 # implements quaternion[index] = value, performs a copy internally if wp.config.enable_vector_component_overwrites is True
 add_builtin(
     "assign_copy",
diff --git a/warp/_src/texture.py b/warp/_src/texture.py
index bd77344c34..59b8e1a32e 100644
--- a/warp/_src/texture.py
+++ b/warp/_src/texture.py
@@ -80,12 +80,16 @@ class texture1d_t(ctypes.Structure):
         ("tex", ctypes.c_uint64),
         ("width", ctypes.c_int32),
         ("num_channels", ctypes.c_int32),
+        ("filter_mode", ctypes.c_int32),
+        ("use_normalized_coords", ctypes.c_int32),
     )
 
-    def __init__(self, tex=0, width=0, num_channels=0):
+    def __init__(self, tex=0, width=0, num_channels=0, filter_mode=0, use_normalized_coords=1):
         self.tex = tex
         self.width = width
         self.num_channels = num_channels
+        self.filter_mode = filter_mode
+        self.use_normalized_coords = use_normalized_coords
 
 
 class texture2d_t(ctypes.Structure):
@@ -99,13 +103,17 @@ class texture2d_t(ctypes.Structure):
         ("width", ctypes.c_int32),
         ("height", ctypes.c_int32),
         ("num_channels", ctypes.c_int32),
+        ("filter_mode", ctypes.c_int32),
+        ("use_normalized_coords", ctypes.c_int32),
     )
 
-    def __init__(self, tex=0, width=0, height=0, num_channels=0):
+    def __init__(self, tex=0, width=0, height=0, num_channels=0, filter_mode=0, use_normalized_coords=1):
         self.tex = tex
         self.width = width
         self.height = height
         self.num_channels = num_channels
+        self.filter_mode = filter_mode
+        self.use_normalized_coords = use_normalized_coords
 
 
 class texture3d_t(ctypes.Structure):
@@ -120,14 +128,18 @@ class texture3d_t(ctypes.Structure):
         ("height", ctypes.c_int32),
         ("depth", ctypes.c_int32),
         ("num_channels", ctypes.c_int32),
+        ("filter_mode", ctypes.c_int32),
+        ("use_normalized_coords", ctypes.c_int32),
     )
 
-    def __init__(self, tex=0, width=0, height=0, depth=0, num_channels=0):
+    def __init__(self, tex=0, width=0, height=0, depth=0, num_channels=0, filter_mode=0, use_normalized_coords=1):
         self.tex = tex
         self.width = width
         self.height = height
         self.depth = depth
         self.num_channels = num_channels
+        self.filter_mode = filter_mode
+        self.use_normalized_coords = use_normalized_coords
 
 
 class cuda_array_desc_t(ctypes.Structure):
@@ -956,7 +968,13 @@ def __ctype__(self) -> texture1d_t:
         """Return the ctypes structure for passing to kernels."""
         if self._tex_handle == 0:
             raise RuntimeError("Texture was created with data=None but never initialized.")
-        return texture1d_t(self._tex_handle, self._width, self._num_channels)
+        return texture1d_t(
+            self._tex_handle,
+            self._width,
+            self._num_channels,
+            int(self._filter_mode),
+            int(self._normalized_coords),
+        )
 
 
 class Texture2D(Texture):
@@ -1033,7 +1051,14 @@ def __ctype__(self) -> texture2d_t:
         """Return the ctypes structure for passing to kernels."""
         if self._tex_handle == 0:
             raise RuntimeError("Texture was created with data=None but never initialized.")
-        return texture2d_t(self._tex_handle, self._width, self._height, self._num_channels)
+        return texture2d_t(
+            self._tex_handle,
+            self._width,
+            self._height,
+            self._num_channels,
+            int(self._filter_mode),
+            int(self._normalized_coords),
+        )
 
 
 class Texture3D(Texture):
@@ -1114,7 +1139,15 @@ def __ctype__(self) -> texture3d_t:
         """Return the ctypes structure for passing to kernels."""
         if self._tex_handle == 0:
             raise RuntimeError("Texture was created with data=None but never initialized.")
-        return texture3d_t(self._tex_handle, self._width, self._height, self._depth, self._num_channels)
+        return texture3d_t(
+            self._tex_handle,
+            self._width,
+            self._height,
+            self._depth,
+            self._num_channels,
+            int(self._filter_mode),
+            int(self._normalized_coords),
+        )
 
 
 class TextureResourceFlags(enum.IntEnum):
diff --git a/warp/native/texture.h b/warp/native/texture.h
index 7bd5c27f81..ac49cecc79 100644
--- a/warp/native/texture.h
+++ b/warp/native/texture.h
@@ -155,18 +155,26 @@ struct texture1d_t {
     uint64 tex;  // CUtexObject handle (GPU) or Texture* (CPU)
     int32 width;
     int32 num_channels;
+    int32 filter_mode;
+    int32 use_normalized_coords;
 
     CUDA_CALLABLE inline texture1d_t()
         : tex(0)
         , width(0)
         , num_channels(0)
+        , filter_mode(0)
+        , use_normalized_coords(1)
     {
     }
 
-    CUDA_CALLABLE inline texture1d_t(uint64 tex, int32 width, int32 num_channels)
+    CUDA_CALLABLE inline texture1d_t(
+        uint64 tex, int32 width, int32 num_channels, int32 filter_mode, int32 use_normalized_coords
+    )
         : tex(tex)
         , width(width)
         , num_channels(num_channels)
+        , filter_mode(filter_mode)
+        , use_normalized_coords(use_normalized_coords)
     {
     }
 };
@@ -176,20 +184,28 @@ struct texture2d_t {
     int32 width;
     int32 height;
     int32 num_channels;
+    int32 filter_mode;
+    int32 use_normalized_coords;
 
     CUDA_CALLABLE inline texture2d_t()
         : tex(0)
         , width(0)
         , height(0)
         , num_channels(0)
+        , filter_mode(0)
+        , use_normalized_coords(1)
     {
     }
 
-    CUDA_CALLABLE inline texture2d_t(uint64 tex, int32 width, int32 height, int32 num_channels)
+    CUDA_CALLABLE inline texture2d_t(
+        uint64 tex, int32 width, int32 height, int32 num_channels, int32 filter_mode, int32 use_normalized_coords
+    )
         : tex(tex)
         , width(width)
         , height(height)
         , num_channels(num_channels)
+        , filter_mode(filter_mode)
+        , use_normalized_coords(use_normalized_coords)
     {
     }
 };
@@ -200,6 +216,8 @@ struct texture3d_t {
     int32 height;
     int32 depth;
     int32 num_channels;
+    int32 filter_mode;
+    int32 use_normalized_coords;
 
     CUDA_CALLABLE inline texture3d_t()
         : tex(0)
@@ -207,15 +225,27 @@ struct texture3d_t {
         , height(0)
         , depth(0)
         , num_channels(0)
+        , filter_mode(0)
+        , use_normalized_coords(1)
     {
     }
 
-    CUDA_CALLABLE inline texture3d_t(uint64 tex, int32 width, int32 height, int32 depth, int32 num_channels)
+    CUDA_CALLABLE inline texture3d_t(
+        uint64 tex,
+        int32 width,
+        int32 height,
+        int32 depth,
+        int32 num_channels,
+        int32 filter_mode,
+        int32 use_normalized_coords
+    )
         : tex(tex)
         , width(width)
         , height(height)
         , depth(depth)
         , num_channels(num_channels)
+        , filter_mode(filter_mode)
+        , use_normalized_coords(use_normalized_coords)
     {
     }
 };
@@ -658,6 +688,12 @@ template <> struct texture_sample_helper<float> {
     }
 
     static CUDA_CALLABLE float zero() { return 0.0f; }
+
+#if defined(__CUDA_ARCH__)
+    static CUDA_CALLABLE float fetch_1d(uint64 t, float u, int c) { return tex1D<float>(t, u); }
+    static CUDA_CALLABLE float fetch_2d(uint64 t, float u, float v, int c) { return tex2D<float>(t, u, v); }
+    static CUDA_CALLABLE float fetch_3d(uint64 t, float u, float v, float w, int c) { return tex3D<float>(t, u, v, w); }
+#endif
 };
 
 template <> struct texture_sample_helper<vec2f> {
@@ -701,6 +737,24 @@ template <> struct texture_sample_helper<vec2f> {
     }
 
     static CUDA_CALLABLE vec2f zero() { return vec2f(0.0f, 0.0f); }
+
+#if defined(__CUDA_ARCH__)
+    static CUDA_CALLABLE float fetch_1d(uint64 t, float u, int c)
+    {
+        float2 v = tex1D<float2>(t, u);
+        return c == 0 ? v.x : v.y;
+    }
+    static CUDA_CALLABLE float fetch_2d(uint64 t, float u, float v_, int c)
+    {
+        float2 v = tex2D<float2>(t, u, v_);
+        return c == 0 ? v.x : v.y;
+    }
+    static CUDA_CALLABLE float fetch_3d(uint64 t, float u, float v_, float w, int c)
+    {
+        float2 v = tex3D<float2>(t, u, v_, w);
+        return c == 0 ? v.x : v.y;
+    }
+#endif
 };
 
 template <> struct texture_sample_helper<vec4f> {
@@ -753,6 +807,24 @@ template <> struct texture_sample_helper<vec4f> {
     }
 
     static CUDA_CALLABLE vec4f zero() { return vec4f(0.0f, 0.0f, 0.0f, 0.0f); }
+
+#if defined(__CUDA_ARCH__)
+    static CUDA_CALLABLE float fetch_1d(uint64 t, float u, int c)
+    {
+        float4 v = tex1D<float4>(t, u);
+        return c == 0 ? v.x : c == 1 ? v.y : c == 2 ? v.z : v.w;
+    }
+    static CUDA_CALLABLE float fetch_2d(uint64 t, float u, float v_, int c)
+    {
+        float4 v = tex2D<float4>(t, u, v_);
+        return c == 0 ? v.x : c == 1 ? v.y : c == 2 ? v.z : v.w;
+    }
+    static CUDA_CALLABLE float fetch_3d(uint64 t, float u, float v_, float w, int c)
+    {
+        float4 v = tex3D<float4>(t, u, v_, w);
+        return c == 0 ? v.x : c == 1 ? v.y : c == 2 ? v.z : v.w;
+    }
+#endif
 };
 
 // 1D texture sampling with scalar coordinate
@@ -785,19 +857,66 @@ template <typename T> CUDA_CALLABLE T texture_sample(const texture3d_t& tex, flo
     return texture_sample_helper<T>::sample_3d(tex, u, v, w);
 }
 
-// Adjoint stubs for texture sampling (non-differentiable for now)
+// Adjoints for texture sampling w.r.t. sampling coordinates.
+// Gradients w.r.t. texture data are not supported; adj_tex is a no-op.
+// On GPU, requires filter_mode and use_normalized_coords in the descriptor.
+// Boundary behavior matches PyTorch grid_sample with padding_mode="border":
+// gradient is zero when the sampling position straddles a volume boundary.
 template <typename T>
 CUDA_CALLABLE void
 adj_texture_sample(const texture1d_t& tex, float u, texture1d_t& adj_tex, float& adj_u, const T& adj_ret)
 {
-    // Texture sampling is not differentiable in this implementation
+    if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST)
+        return;
+
+    float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f;
+
+#if defined(__CUDA_ARCH__)
+    float raw_tx = tex.use_normalized_coords ? u * (float)tex.width - 0.5f : u - 0.5f;
+    int x0 = (int)floor(raw_tx);
+    int x1 = x0 + 1;
+
+    if (x0 >= 0 && x1 < tex.width) {
+        float u0 = tex.use_normalized_coords ? ((float)x0 + 0.5f) / (float)tex.width : (float)x0 + 0.5f;
+        float u1 = tex.use_normalized_coords ? ((float)x1 + 0.5f) / (float)tex.width : (float)x1 + 0.5f;
+        float gtx = 0.0f;
+        for (int c = 0; c < tex.num_channels; c++)
+            gtx += (texture_sample_helper<T>::fetch_1d(tex.tex, u1, c)
+                    - texture_sample_helper<T>::fetch_1d(tex.tex, u0, c))
+                * ((const float*)&adj_ret)[c];
+        adj_u += gtx_mult * gtx;
+    }
+#else
+    if (tex.tex == 0)
+        return;
+    const Texture* cpu_tex = (const Texture*)tex.tex;
+
+    float coord_u = cpu_tex->use_normalized_coords ? u : (u / (float)cpu_tex->width);
+    float raw_tx = coord_u * (float)cpu_tex->width - 0.5f;
+    float tx = cpu_apply_address_mode_1d(coord_u, cpu_tex->width, cpu_tex->address_mode_u);
+
+    int x0_raw = (int)floor(raw_tx);
+    int x1_raw = x0_raw + 1;
+    int x0 = (int)floor(tx);
+    int x1 = x0 + 1;
+
+    if (cpu_in_bounds_1d(x0_raw, cpu_tex->width) && cpu_in_bounds_1d(x1_raw, cpu_tex->width)) {
+        int x0w = cpu_apply_address_mode_index(x0, cpu_tex->width, cpu_tex->address_mode_u);
+        int x1w = cpu_apply_address_mode_index(x1, cpu_tex->width, cpu_tex->address_mode_u);
+        float gtx = 0.0f;
+        for (int c = 0; c < cpu_tex->num_channels; c++)
+            gtx += (cpu_fetch_texel_1d(cpu_tex, x1w, c) - cpu_fetch_texel_1d(cpu_tex, x0w, c))
+                * ((const float*)&adj_ret)[c];
+        adj_u += gtx_mult * gtx;
+    }
+#endif
 }
 
 template <typename T>
 CUDA_CALLABLE void
 adj_texture_sample(const texture2d_t& tex, const vec2f& uv, texture2d_t& adj_tex, vec2f& adj_uv, const T& adj_ret)
 {
-    // Texture sampling is not differentiable in this implementation
+    adj_texture_sample(tex, uv[0], uv[1], adj_tex, adj_uv[0], adj_uv[1], adj_ret);
 }
 
 template <typename T>
@@ -805,14 +924,98 @@ CUDA_CALLABLE void adj_texture_sample(
     const texture2d_t& tex, float u, float v, texture2d_t& adj_tex, float& adj_u, float& adj_v, const T& adj_ret
 )
 {
-    // Texture sampling is not differentiable in this implementation
+    if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST)
+        return;
+
+    float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f;
+    float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f;
+
+#if defined(__CUDA_ARCH__)
+    float raw_tx = tex.use_normalized_coords ? u * (float)tex.width - 0.5f : u - 0.5f;
+    float raw_ty = tex.use_normalized_coords ? v * (float)tex.height - 0.5f : v - 0.5f;
+    int x0 = (int)floor(raw_tx);
+    int x1 = x0 + 1;
+    int y0 = (int)floor(raw_ty);
+    int y1 = y0 + 1;
+    float fx = raw_tx - (float)x0;
+    float fy = raw_ty - (float)y0;
+
+    bool x_ok = (x0 >= 0 && x1 < tex.width);
+    bool y_ok = (y0 >= 0 && y1 < tex.height);
+
+    auto fetch = [&](int x, int y, int c) -> float {
+        float uf = tex.use_normalized_coords ? ((float)x + 0.5f) / (float)tex.width : (float)x + 0.5f;
+        float vf = tex.use_normalized_coords ? ((float)y + 0.5f) / (float)tex.height : (float)y + 0.5f;
+        return texture_sample_helper<T>::fetch_2d(tex.tex, uf, vf, c);
+    };
+
+    float gtx = 0.0f, gty = 0.0f;
+    for (int c = 0; c < tex.num_channels; c++) {
+        float gOut = ((const float*)&adj_ret)[c];
+        float v00 = fetch(x0, y0, c);
+        float v10 = fetch(x1, y0, c);
+        float v01 = fetch(x0, y1, c);
+        float v11 = fetch(x1, y1, c);
+        if (x_ok)
+            gtx += ((v10 - v00) * (1.0f - fy) + (v11 - v01) * fy) * gOut;
+        if (y_ok)
+            gty += ((v01 - v00) * (1.0f - fx) + (v11 - v10) * fx) * gOut;
+    }
+    adj_u += gtx_mult * gtx;
+    adj_v += gty_mult * gty;
+#else
+    if (tex.tex == 0)
+        return;
+    const Texture* cpu_tex = (const Texture*)tex.tex;
+
+    float coord_u = cpu_tex->use_normalized_coords ? u : (u / (float)cpu_tex->width);
+    float coord_v = cpu_tex->use_normalized_coords ? v : (v / (float)cpu_tex->height);
+    float raw_tx = coord_u * (float)cpu_tex->width - 0.5f;
+    float raw_ty = coord_v * (float)cpu_tex->height - 0.5f;
+    float tx = cpu_apply_address_mode_1d(coord_u, cpu_tex->width, cpu_tex->address_mode_u);
+    float ty = cpu_apply_address_mode_1d(coord_v, cpu_tex->height, cpu_tex->address_mode_v);
+
+    int x0_raw = (int)floor(raw_tx);
+    int x1_raw = x0_raw + 1;
+    int y0_raw = (int)floor(raw_ty);
+    int y1_raw = y0_raw + 1;
+    int x0 = (int)floor(tx);
+    int x1 = x0 + 1;
+    int y0 = (int)floor(ty);
+    int y1 = y0 + 1;
+    float fx = tx - (float)x0;
+    float fy = ty - (float)y0;
+
+    bool x_ok = (cpu_in_bounds_1d(x0_raw, cpu_tex->width) && cpu_in_bounds_1d(x1_raw, cpu_tex->width));
+    bool y_ok = (cpu_in_bounds_1d(y0_raw, cpu_tex->height) && cpu_in_bounds_1d(y1_raw, cpu_tex->height));
+
+    int x0w = cpu_apply_address_mode_index(x0, cpu_tex->width, cpu_tex->address_mode_u);
+    int x1w = cpu_apply_address_mode_index(x1, cpu_tex->width, cpu_tex->address_mode_u);
+    int y0w = cpu_apply_address_mode_index(y0, cpu_tex->height, cpu_tex->address_mode_v);
+    int y1w = cpu_apply_address_mode_index(y1, cpu_tex->height, cpu_tex->address_mode_v);
+
+    float gtx = 0.0f, gty = 0.0f;
+    for (int c = 0; c < cpu_tex->num_channels; c++) {
+        float gOut = ((const float*)&adj_ret)[c];
+        float v00 = cpu_fetch_texel_2d(cpu_tex, x0w, y0w, c);
+        float v10 = cpu_fetch_texel_2d(cpu_tex, x1w, y0w, c);
+        float v01 = cpu_fetch_texel_2d(cpu_tex, x0w, y1w, c);
+        float v11 = cpu_fetch_texel_2d(cpu_tex, x1w, y1w, c);
+        if (x_ok)
+            gtx += ((v10 - v00) * (1.0f - fy) + (v11 - v01) * fy) * gOut;
+        if (y_ok)
+            gty += ((v01 - v00) * (1.0f - fx) + (v11 - v10) * fx) * gOut;
+    }
+    adj_u += gtx_mult * gtx;
+    adj_v += gty_mult * gty;
+#endif
 }
 
 template <typename T>
 CUDA_CALLABLE void
 adj_texture_sample(const texture3d_t& tex, const vec3f& uvw, texture3d_t& adj_tex, vec3f& adj_uvw, const T& adj_ret)
 {
-    // Texture sampling is not differentiable in this implementation
+    adj_texture_sample(tex, uvw[0], uvw[1], uvw[2], adj_tex, adj_uvw[0], adj_uvw[1], adj_uvw[2], adj_ret);
 }
 
 template <typename T>
@@ -828,7 +1031,135 @@ CUDA_CALLABLE void adj_texture_sample(
     const T& adj_ret
 )
 {
-    // Texture sampling is not differentiable in this implementation
+    if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST)
+        return;
+
+    float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f;
+    float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f;
+    float gtz_mult = tex.use_normalized_coords ? (float)tex.depth : 1.0f;
+
+#if defined(__CUDA_ARCH__)
+    float raw_tx = tex.use_normalized_coords ? u * (float)tex.width - 0.5f : u - 0.5f;
+    float raw_ty = tex.use_normalized_coords ? v * (float)tex.height - 0.5f : v - 0.5f;
+    float raw_tz = tex.use_normalized_coords ? w * (float)tex.depth - 0.5f : w - 0.5f;
+    int x0 = (int)floor(raw_tx);
+    int x1 = x0 + 1;
+    int y0 = (int)floor(raw_ty);
+    int y1 = y0 + 1;
+    int z0 = (int)floor(raw_tz);
+    int z1 = z0 + 1;
+    float fx = raw_tx - (float)x0;
+    float fy = raw_ty - (float)y0;
+    float fz = raw_tz - (float)z0;
+
+    bool x_ok = (x0 >= 0 && x1 < tex.width);
+    bool y_ok = (y0 >= 0 && y1 < tex.height);
+    bool z_ok = (z0 >= 0 && z1 < tex.depth);
+
+    auto fetch = [&](int x, int y, int z, int c) -> float {
+        float uf = tex.use_normalized_coords ? ((float)x + 0.5f) / (float)tex.width : (float)x + 0.5f;
+        float vf = tex.use_normalized_coords ? ((float)y + 0.5f) / (float)tex.height : (float)y + 0.5f;
+        float wf = tex.use_normalized_coords ? ((float)z + 0.5f) / (float)tex.depth : (float)z + 0.5f;
+        return texture_sample_helper<T>::fetch_3d(tex.tex, uf, vf, wf, c);
+    };
+
+    float gtx = 0.0f, gty = 0.0f, gtz = 0.0f;
+    for (int c = 0; c < tex.num_channels; c++) {
+        float gOut = ((const float*)&adj_ret)[c];
+        float v000 = fetch(x0, y0, z0, c);
+        float v100 = fetch(x1, y0, z0, c);
+        float v010 = fetch(x0, y1, z0, c);
+        float v110 = fetch(x1, y1, z0, c);
+        float v001 = fetch(x0, y0, z1, c);
+        float v101 = fetch(x1, y0, z1, c);
+        float v011 = fetch(x0, y1, z1, c);
+        float v111 = fetch(x1, y1, z1, c);
+        if (x_ok)
+            gtx += (((v100 - v000) * (1.0f - fy) + (v110 - v010) * fy) * (1.0f - fz)
+                    + ((v101 - v001) * (1.0f - fy) + (v111 - v011) * fy) * fz)
+                * gOut;
+        if (y_ok)
+            gty += (((v010 - v000) * (1.0f - fx) + (v110 - v100) * fx) * (1.0f - fz)
+                    + ((v011 - v001) * (1.0f - fx) + (v111 - v101) * fx) * fz)
+                * gOut;
+        if (z_ok)
+            gtz += (((v001 - v000) * (1.0f - fx) + (v101 - v100) * fx) * (1.0f - fy)
+                    + ((v011 - v010) * (1.0f - fx) + (v111 - v110) * fx) * fy)
+                * gOut;
+    }
+    adj_u += gtx_mult * gtx;
+    adj_v += gty_mult * gty;
+    adj_w += gtz_mult * gtz;
+#else
+    if (tex.tex == 0)
+        return;
+    const Texture* cpu_tex = (const Texture*)tex.tex;
+
+    float coord_u = cpu_tex->use_normalized_coords ? u : (u / (float)cpu_tex->width);
+    float coord_v = cpu_tex->use_normalized_coords ? v : (v / (float)cpu_tex->height);
+    float coord_w = cpu_tex->use_normalized_coords ? w : (w / (float)cpu_tex->depth);
+    float raw_tx = coord_u * (float)cpu_tex->width - 0.5f;
+    float raw_ty = coord_v * (float)cpu_tex->height - 0.5f;
+    float raw_tz = coord_w * (float)cpu_tex->depth - 0.5f;
+    float tx = cpu_apply_address_mode_1d(coord_u, cpu_tex->width, cpu_tex->address_mode_u);
+    float ty = cpu_apply_address_mode_1d(coord_v, cpu_tex->height, cpu_tex->address_mode_v);
+    float tz = cpu_apply_address_mode_1d(coord_w, cpu_tex->depth, cpu_tex->address_mode_w);
+
+    int x0_raw = (int)floor(raw_tx);
+    int x1_raw = x0_raw + 1;
+    int y0_raw = (int)floor(raw_ty);
+    int y1_raw = y0_raw + 1;
+    int z0_raw = (int)floor(raw_tz);
+    int z1_raw = z0_raw + 1;
+    int x0 = (int)floor(tx);
+    int x1 = x0 + 1;
+    int y0 = (int)floor(ty);
+    int y1 = y0 + 1;
+    int z0 = (int)floor(tz);
+    int z1 = z0 + 1;
+    float fx = tx - (float)x0;
+    float fy = ty - (float)y0;
+    float fz = tz - (float)z0;
+
+    bool x_ok = (cpu_in_bounds_1d(x0_raw, cpu_tex->width) && cpu_in_bounds_1d(x1_raw, cpu_tex->width));
+    bool y_ok = (cpu_in_bounds_1d(y0_raw, cpu_tex->height) && cpu_in_bounds_1d(y1_raw, cpu_tex->height));
+    bool z_ok = (cpu_in_bounds_1d(z0_raw, cpu_tex->depth) && cpu_in_bounds_1d(z1_raw, cpu_tex->depth));
+
+    int x0w = cpu_apply_address_mode_index(x0, cpu_tex->width, cpu_tex->address_mode_u);
+    int x1w = cpu_apply_address_mode_index(x1, cpu_tex->width, cpu_tex->address_mode_u);
+    int y0w = cpu_apply_address_mode_index(y0, cpu_tex->height, cpu_tex->address_mode_v);
+    int y1w = cpu_apply_address_mode_index(y1, cpu_tex->height, cpu_tex->address_mode_v);
+    int z0w = cpu_apply_address_mode_index(z0, cpu_tex->depth, cpu_tex->address_mode_w);
+    int z1w = cpu_apply_address_mode_index(z1, cpu_tex->depth, cpu_tex->address_mode_w);
+
+    float gtx = 0.0f, gty = 0.0f, gtz = 0.0f;
+    for (int c = 0; c < cpu_tex->num_channels; c++) {
+        float gOut = ((const float*)&adj_ret)[c];
+        float v000 = cpu_fetch_texel_3d(cpu_tex, x0w, y0w, z0w, c);
+        float v100 = cpu_fetch_texel_3d(cpu_tex, x1w, y0w, z0w, c);
+        float v010 = cpu_fetch_texel_3d(cpu_tex, x0w, y1w, z0w, c);
+        float v110 = cpu_fetch_texel_3d(cpu_tex, x1w, y1w, z0w, c);
+        float v001 = cpu_fetch_texel_3d(cpu_tex, x0w, y0w, z1w, c);
+        float v101 = cpu_fetch_texel_3d(cpu_tex, x1w, y0w, z1w, c);
+        float v011 = cpu_fetch_texel_3d(cpu_tex, x0w, y1w, z1w, c);
+        float v111 = cpu_fetch_texel_3d(cpu_tex, x1w, y1w, z1w, c);
+        if (x_ok)
+            gtx += (((v100 - v000) * (1.0f - fy) + (v110 - v010) * fy) * (1.0f - fz)
+                    + ((v101 - v001) * (1.0f - fy) + (v111 - v011) * fy) * fz)
+                * gOut;
+        if (y_ok)
+            gty += (((v010 - v000) * (1.0f - fx) + (v110 - v100) * fx) * (1.0f - fz)
+                    + ((v011 - v001) * (1.0f - fx) + (v111 - v101) * fx) * fz)
+                * gOut;
+        if (z_ok)
+            gtz += (((v001 - v000) * (1.0f - fx) + (v101 - v100) * fx) * (1.0f - fy)
+                    + ((v011 - v010) * (1.0f - fx) + (v111 - v110) * fx) * fy)
+                * gOut;
+    }
+    adj_u += gtx_mult * gtx;
+    adj_v += gty_mult * gty;
+    adj_w += gtz_mult * gtz;
+#endif
 }
 
 // Type aliases for code generation
diff --git a/warp/tests/cuda/test_texture.py b/warp/tests/cuda/test_texture.py
index 2eb6ecedff..d686ae10cd 100644
--- a/warp/tests/cuda/test_texture.py
+++ b/warp/tests/cuda/test_texture.py
@@ -2600,6 +2600,330 @@ def test_texture3d_array(test, device):
     np.testing.assert_allclose(result, expected, rtol=1e-5, atol=1e-5)
 
 
+# ============================================================================
+# Adjoint tests
+# ============================================================================
+
+
+@wp.kernel
+def sample_1d(tex: wp.Texture1D, pos: wp.array(dtype=float), out: wp.array(dtype=float)):
+    tid = wp.tid()
+    out[tid] = wp.texture_sample(tex, pos[tid], dtype=float)
+
+
+@wp.kernel
+def sample_2d(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=float)):
+    tid = wp.tid()
+    out[tid] = wp.texture_sample(tex, pos[tid], dtype=float)
+
+
+@wp.kernel
+def sample_3d(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.array(dtype=float)):
+    tid = wp.tid()
+    out[tid] = wp.texture_sample(tex, pos[tid], dtype=float)
+
+
+def _grad_1d(data, u, device):
+    tex = wp.Texture1D(
+        data,
+        normalized_coords=False,
+        filter_mode=wp.TextureFilterMode.LINEAR,
+        address_mode=wp.TextureAddressMode.BORDER,
+        device=device,
+    )
+    pos = wp.array([u], dtype=float, requires_grad=True, device=device)
+    out = wp.zeros(1, dtype=float, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(sample_1d, dim=1, inputs=[tex, pos], outputs=[out], device=device)
+    out.grad = wp.ones(1, dtype=float, device=device)
+    tape.backward()
+    return pos.grad.numpy()[0]
+
+
+def _grad_2d(data, coord, device):
+    tex = wp.Texture2D(
+        data,
+        normalized_coords=False,
+        filter_mode=wp.TextureFilterMode.LINEAR,
+        address_mode=wp.TextureAddressMode.BORDER,
+        device=device,
+    )
+    pos = wp.array([wp.vec2f(*coord)], dtype=wp.vec2f, requires_grad=True, device=device)
+    out = wp.zeros(1, dtype=float, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(sample_2d, dim=1, inputs=[tex, pos], outputs=[out], device=device)
+    out.grad = wp.ones(1, dtype=float, device=device)
+    tape.backward()
+    return pos.grad.numpy()[0]
+
+
+def _grad_3d(data, coord, device):
+    tex = wp.Texture3D(
+        data,
+        normalized_coords=False,
+        filter_mode=wp.TextureFilterMode.LINEAR,
+        address_mode=wp.TextureAddressMode.BORDER,
+        device=device,
+    )
+    pos = wp.array([wp.vec3f(*coord)], dtype=wp.vec3f, requires_grad=True, device=device)
+    out = wp.zeros(1, dtype=float, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(sample_3d, dim=1, inputs=[tex, pos], outputs=[out], device=device)
+    out.grad = wp.ones(1, dtype=float, device=device)
+    tape.backward()
+    return pos.grad.numpy()[0]
+
+
+def test_texture1d_adj_boundary_zero(test, device):
+    """Gradient is zero when sampling position straddles the near boundary."""
+    data = np.random.default_rng(0).standard_normal(16).astype(np.float32)
+    np.testing.assert_allclose(_grad_1d(data, 0.1, device), 0.0, atol=1e-6)
+
+
+def test_texture1d_adj_far_boundary_zero(test, device):
+    """Gradient is zero when sampling position straddles the far boundary."""
+    data = np.random.default_rng(1).standard_normal(16).astype(np.float32)
+    np.testing.assert_allclose(_grad_1d(data, 15.9, device), 0.0, atol=1e-6)
+
+
+def test_texture1d_adj_closest_zero(test, device):
+    """Gradient is zero for CLOSEST filter mode."""
+    data = np.random.default_rng(2).standard_normal(16).astype(np.float32)
+    tex = wp.Texture1D(
+        data,
+        normalized_coords=False,
+        filter_mode=wp.TextureFilterMode.CLOSEST,
+        address_mode=wp.TextureAddressMode.BORDER,
+        device=device,
+    )
+    pos = wp.array([7.3], dtype=float, requires_grad=True, device=device)
+    out = wp.zeros(1, dtype=float, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(sample_1d, dim=1, inputs=[tex, pos], outputs=[out], device=device)
+    out.grad = wp.ones(1, dtype=float, device=device)
+    tape.backward()
+    np.testing.assert_allclose(pos.grad.numpy()[0], 0.0, atol=1e-6)
+
+
+def test_texture1d_adj_linear_signal(test, device):
+    """Gradient of a linear signal is constant and analytically known."""
+    W = 16
+    # value = x / W, so d(value)/d(u) = 1/W
+    data = np.arange(W, dtype=np.float32) / W
+    g = _grad_1d(data, 7.3, device)
+    np.testing.assert_allclose(g, 1.0 / W, atol=1e-5)
+
+
+def test_texture2d_adj_near_boundary_zero(test, device):
+    """2D gradient is zero when straddling the near boundary in both axes."""
+    data = np.random.default_rng(3).standard_normal((8, 10)).astype(np.float32)
+    g = _grad_2d(data, (0.1, 0.1), device)
+    np.testing.assert_allclose(g, [0.0, 0.0], atol=1e-6)
+
+
+def test_texture2d_adj_far_boundary_zero(test, device):
+    """2D gradient is zero when straddling the far boundary."""
+    data = np.random.default_rng(4).standard_normal((8, 10)).astype(np.float32)
+    H, W = data.shape
+    g = _grad_2d(data, (W - 0.1, H - 0.1), device)
+    np.testing.assert_allclose(g, [0.0, 0.0], atol=1e-6)
+
+
+def test_texture2d_adj_partial_boundary(test, device):
+    """2D gradient: x interior but y straddling boundary — only y grad is zero."""
+    data = np.random.default_rng(5).standard_normal((8, 10)).astype(np.float32)
+    g = _grad_2d(data, (3.7, 0.1), device)
+    # x is interior so gradient should be nonzero; y straddles boundary so zero
+    test.assertNotEqual(g[0], 0.0)
+    np.testing.assert_allclose(g[1], 0.0, atol=1e-6)
+
+
+def test_texture2d_adj_linear_x(test, device):
+    """2D gradient of signal linear in x: x-grad is 1/W, y-grad is zero."""
+    H, W = 6, 10
+    data = np.zeros((H, W), dtype=np.float32)
+    for x in range(W):
+        data[:, x] = x / W
+    g = _grad_2d(data, (4.5, 3.0), device)
+    np.testing.assert_allclose(g[0], 1.0 / W, atol=1e-5)
+    np.testing.assert_allclose(g[1], 0.0, atol=1e-5)
+
+
+def test_texture2d_adj_linear_y(test, device):
+    """2D gradient of signal linear in y: y-grad is 1/H, x-grad is zero."""
+    H, W = 6, 10
+    data = np.zeros((H, W), dtype=np.float32)
+    for y in range(H):
+        data[y, :] = y / H
+    g = _grad_2d(data, (4.5, 3.0), device)
+    np.testing.assert_allclose(g[0], 0.0, atol=1e-5)
+    np.testing.assert_allclose(g[1], 1.0 / H, atol=1e-5)
+
+
+def test_texture3d_adj_near_boundary_zero(test, device):
+    """3D gradient is zero when straddling the near boundary in all axes."""
+    data = np.random.default_rng(6).standard_normal((8, 6, 10)).astype(np.float32)
+    g = _grad_3d(data, (0.1, 0.1, 0.1), device)
+    np.testing.assert_allclose(g, [0.0, 0.0, 0.0], atol=1e-6)
+
+
+def test_texture3d_adj_far_boundary_zero(test, device):
+    """3D gradient is zero when straddling the far boundary."""
+    data = np.random.default_rng(7).standard_normal((8, 6, 10)).astype(np.float32)
+    D, H, W = data.shape
+    g = _grad_3d(data, (W - 0.1, H - 0.1, D - 0.1), device)
+    np.testing.assert_allclose(g, [0.0, 0.0, 0.0], atol=1e-6)
+
+
+def test_texture3d_adj_partial_boundary(test, device):
+    """3D gradient: x and y interior, z straddling boundary — only z grad is zero."""
+    data = np.random.default_rng(8).standard_normal((8, 6, 10)).astype(np.float32)
+    g = _grad_3d(data, (2.3, 3.7, 0.1), device)
+    test.assertNotEqual(g[0], 0.0)
+    test.assertNotEqual(g[1], 0.0)
+    np.testing.assert_allclose(g[2], 0.0, atol=1e-6)
+
+
+def test_texture3d_adj_uniform_zero(test, device):
+    """Gradient of a uniform volume is zero (no spatial variation to differentiate)."""
+    data = np.ones((8, 6, 10), dtype=np.float32)
+    g = _grad_3d(data, (2.3, 3.7, 1.1), device)
+    np.testing.assert_allclose(g, [0.0, 0.0, 0.0], atol=1e-6)
+
+
+def test_texture3d_adj_linear_x(test, device):
+    """3D gradient of signal linear in x: x-grad is 1/W, y and z grads are zero."""
+    D, H, W = 8, 6, 10
+    data = np.zeros((D, H, W), dtype=np.float32)
+    for x in range(W):
+        data[:, :, x] = x / W
+    g = _grad_3d(data, (4.5, 3.0, 3.0), device)
+    np.testing.assert_allclose(g[0], 1.0 / W, atol=1e-5)
+    np.testing.assert_allclose(g[1], 0.0, atol=1e-5)
+    np.testing.assert_allclose(g[2], 0.0, atol=1e-5)
+
+
+def test_texture3d_adj_linear_y(test, device):
+    """3D gradient of signal linear in y: y-grad is 1/H, x and z grads are zero."""
+    D, H, W = 8, 6, 10
+    data = np.zeros((D, H, W), dtype=np.float32)
+    for y in range(H):
+        data[:, y, :] = y / H
+    g = _grad_3d(data, (4.5, 3.0, 3.0), device)
+    np.testing.assert_allclose(g[0], 0.0, atol=1e-5)
+    np.testing.assert_allclose(g[1], 1.0 / H, atol=1e-5)
+    np.testing.assert_allclose(g[2], 0.0, atol=1e-5)
+
+
+def test_texture3d_adj_linear_z(test, device):
+    """3D gradient of signal linear in z: z-grad is 1/D, x and y grads are zero."""
+    D, H, W = 8, 6, 10
+    data = np.zeros((D, H, W), dtype=np.float32)
+    for z in range(D):
+        data[z, :, :] = z / D
+    g = _grad_3d(data, (4.5, 3.0, 3.0), device)
+    np.testing.assert_allclose(g[0], 0.0, atol=1e-5)
+    np.testing.assert_allclose(g[1], 0.0, atol=1e-5)
+    np.testing.assert_allclose(g[2], 1.0 / D, atol=1e-5)
+
+
+def test_texture2d_adj_vec2f_linear_x(test, device):
+    """2D vec2f texture: x-grad matches scalar case for each channel independently."""
+    H, W = 6, 10
+    data = np.zeros((H, W, 2), dtype=np.float32)
+    for x in range(W):
+        data[:, x, 0] = x / W          # channel 0: linear in x
+        data[:, x, 1] = (W - 1 - x) / W  # channel 1: linear in x, reversed
+
+    tex = wp.Texture2D(data, normalized_coords=False,
+                       filter_mode=wp.TextureFilterMode.LINEAR,
+                       address_mode=wp.TextureAddressMode.BORDER, device=device)
+
+    @wp.kernel
+    def sample_2d_vec2(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=wp.vec2f)):
+        tid = wp.tid()
+        out[tid] = wp.texture_sample(tex, pos[tid], dtype=wp.vec2f)
+
+    pos = wp.array([wp.vec2f(4.5, 3.0)], dtype=wp.vec2f, requires_grad=True, device=device)
+    out = wp.zeros(1, dtype=wp.vec2f, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(sample_2d_vec2, dim=1, inputs=[tex, pos], outputs=[out], device=device)
+    # seed gradient: both channels contribute equally
+    out.grad = wp.array([wp.vec2f(1.0, 1.0)], dtype=wp.vec2f, device=device)
+    tape.backward()
+
+    g = pos.grad.numpy()[0]
+    # d(ch0)/dx = 1/W, d(ch1)/dx = -1/W, sum = 0
+    np.testing.assert_allclose(g[0], 0.0, atol=1e-5)
+    np.testing.assert_allclose(g[1], 0.0, atol=1e-5)
+
+
+def test_texture2d_adj_vec2f_channels_independent(test, device):
+    """2D vec2f texture: seeding only channel 0 gives channel-0-only gradient."""
+    H, W = 6, 10
+    data = np.zeros((H, W, 2), dtype=np.float32)
+    for x in range(W):
+        data[:, x, 0] = x / W   # channel 0: linear in x
+        data[:, x, 1] = 0.0     # channel 1: constant
+
+    tex = wp.Texture2D(data, normalized_coords=False,
+                       filter_mode=wp.TextureFilterMode.LINEAR,
+                       address_mode=wp.TextureAddressMode.BORDER, device=device)
+
+    @wp.kernel
+    def sample_2d_vec2(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=wp.vec2f)):
+        tid = wp.tid()
+        out[tid] = wp.texture_sample(tex, pos[tid], dtype=wp.vec2f)
+
+    pos = wp.array([wp.vec2f(4.5, 3.0)], dtype=wp.vec2f, requires_grad=True, device=device)
+    out = wp.zeros(1, dtype=wp.vec2f, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(sample_2d_vec2, dim=1, inputs=[tex, pos], outputs=[out], device=device)
+    # seed only channel 0
+    out.grad = wp.array([wp.vec2f(1.0, 0.0)], dtype=wp.vec2f, device=device)
+    tape.backward()
+
+    g = pos.grad.numpy()[0]
+    np.testing.assert_allclose(g[0], 1.0 / W, atol=1e-5)
+    np.testing.assert_allclose(g[1], 0.0, atol=1e-5)
+
+
+def test_texture3d_adj_vec2f_linear_z(test, device):
+    """3D vec2f texture: z-grad is 1/D when only channel 0 is linear in z."""
+    D, H, W = 8, 6, 10
+    data = np.zeros((D, H, W, 2), dtype=np.float32)
+    for z in range(D):
+        data[z, :, :, 0] = z / D   # channel 0: linear in z
+        data[z, :, :, 1] = 0.0     # channel 1: constant
+
+    tex = wp.Texture3D(data, normalized_coords=False,
+                       filter_mode=wp.TextureFilterMode.LINEAR,
+                       address_mode=wp.TextureAddressMode.BORDER, device=device)
+
+    @wp.kernel
+    def sample_3d_vec2(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.array(dtype=wp.vec2f)):
+        tid = wp.tid()
+        out[tid] = wp.texture_sample(tex, pos[tid], dtype=wp.vec2f)
+
+    pos = wp.array([wp.vec3f(4.5, 3.0, 3.0)], dtype=wp.vec3f, requires_grad=True, device=device)
+    out = wp.zeros(1, dtype=wp.vec2f, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(sample_3d_vec2, dim=1, inputs=[tex, pos], outputs=[out], device=device)
+    out.grad = wp.array([wp.vec2f(1.0, 0.0)], dtype=wp.vec2f, device=device)
+    tape.backward()
+
+    g = pos.grad.numpy()[0]
+    np.testing.assert_allclose(g[0], 0.0, atol=1e-5)
+    np.testing.assert_allclose(g[1], 0.0, atol=1e-5)
+    np.testing.assert_allclose(g[2], 1.0 / D, atol=1e-5)
+
 # ============================================================================
 # Test Class
 # ============================================================================
@@ -2853,6 +3177,45 @@ class TestTexture(unittest.TestCase):
     TestTexture, "test_texture_struct_both_members", test_texture_struct_both_members, devices=all_devices
 )
 
+# Adjoint
+add_function_test(
+    TestTexture, "test_texture1d_adj_boundary_zero", test_texture1d_adj_boundary_zero, devices=all_devices
+)
+add_function_test(
+    TestTexture, "test_texture1d_adj_far_boundary_zero", test_texture1d_adj_far_boundary_zero, devices=all_devices
+)
+add_function_test(TestTexture, "test_texture1d_adj_closest_zero", test_texture1d_adj_closest_zero, devices=all_devices)
+add_function_test(
+    TestTexture, "test_texture1d_adj_linear_signal", test_texture1d_adj_linear_signal, devices=all_devices
+)
+add_function_test(
+    TestTexture, "test_texture2d_adj_near_boundary_zero", test_texture2d_adj_near_boundary_zero, devices=all_devices
+)
+add_function_test(
+    TestTexture, "test_texture2d_adj_far_boundary_zero", test_texture2d_adj_far_boundary_zero, devices=all_devices
+)
+add_function_test(
+    TestTexture, "test_texture2d_adj_partial_boundary", test_texture2d_adj_partial_boundary, devices=all_devices
+)
+add_function_test(TestTexture, "test_texture2d_adj_linear_x", test_texture2d_adj_linear_x, devices=all_devices)
+add_function_test(TestTexture, "test_texture2d_adj_linear_y", test_texture2d_adj_linear_y, devices=all_devices)
+add_function_test(
+    TestTexture, "test_texture3d_adj_near_boundary_zero", test_texture3d_adj_near_boundary_zero, devices=all_devices
+)
+add_function_test(
+    TestTexture, "test_texture3d_adj_far_boundary_zero", test_texture3d_adj_far_boundary_zero, devices=all_devices
+)
+add_function_test(
+    TestTexture, "test_texture3d_adj_partial_boundary", test_texture3d_adj_partial_boundary, devices=all_devices
+)
+add_function_test(TestTexture, "test_texture3d_adj_uniform_zero", test_texture3d_adj_uniform_zero, devices=all_devices)
+add_function_test(TestTexture, "test_texture3d_adj_linear_x", test_texture3d_adj_linear_x, devices=all_devices)
+add_function_test(TestTexture, "test_texture3d_adj_linear_y", test_texture3d_adj_linear_y, devices=all_devices)
+add_function_test(TestTexture, "test_texture3d_adj_linear_z", test_texture3d_adj_linear_z, devices=all_devices)
+add_function_test(TestTexture, "test_texture2d_adj_vec2f_linear_x", test_texture2d_adj_vec2f_linear_x, devices=all_devices)
+add_function_test(TestTexture, "test_texture2d_adj_vec2f_channels_independent", test_texture2d_adj_vec2f_channels_independent, devices=all_devices)
+add_function_test(TestTexture, "test_texture3d_adj_vec2f_linear_z", test_texture3d_adj_vec2f_linear_z, devices=all_devices)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)

From 160515f996037b7d196a1ee5a05f94df6bbec096 Mon Sep 17 00:00:00 2001
From: Eric Shi <ershi@nvidia.com>
Date: Wed, 18 Mar 2026 13:44:53 -0700
Subject: [PATCH 2/5] Update publications list with two new papers

Add Kamino (GPU multi-body simulation) and ComFree-Sim
(GPU-parallelized contact physics engine) to the 2026 publications.

Signed-off-by: Eric Shi <ershi@nvidia.com>
Signed-off-by: eigenvivek <vivekg@mit.edu>
---
 PUBLICATIONS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/PUBLICATIONS.md b/PUBLICATIONS.md
index d69ffd1502..885a8f953f 100644
--- a/PUBLICATIONS.md
+++ b/PUBLICATIONS.md
@@ -8,6 +8,8 @@ pull request on GitHub or email a link to your arXiv preprint (preferred) or DOI
 ## 2026
 
 - **Discovering neural cohesive zone laws from displacement fields**. *G. Barkoulis Gavris, W. Sun*. April 2026. [DOI:10.1016/j.cma.2026.118733](https://doi.org/10.1016/j.cma.2026.118733)
+- **Kamino: GPU-based Massively Parallel Simulation of Multi-Body Systems with Challenging Topologies**. *V. Tsounis, G. Maloisel, C. Schumacher, R. Grandia, A. Serifi, D. Müller, C. Amevor, T. Widmer, M. Bächer*. March 2026. [arXiv:2603.16536](https://arxiv.org/abs/2603.16536)
+- **ComFree-Sim: A GPU-Parallelized Analytical Contact Physics Engine for Scalable Contact-Rich Robotics Simulation and Control**. *C. Borse, Z. Xie, W. Huang, W. Jin*. March 2026. [arXiv:2603.12185](https://arxiv.org/abs/2603.12185)
 - **cuRoboV2: Dynamics-Aware Motion Generation with Depth-Fused Distance Fields for High-DoF Robots**. *B. Sundaralingam, A. Murali, S. Birchfield*. March 2026. [arXiv:2603.05493](https://arxiv.org/abs/2603.05493)
 - **GaussTwin: Unified Simulation and Correction with Gaussian Splatting for Robotic Digital Twins**. *Y. Cai, P. Jansonnie, C. de Farias, O. Arenz, J. Peters*. March 2026. [arXiv:2603.05108](https://arxiv.org/abs/2603.05108)
 - **X-Loco: Towards Generalist Humanoid Locomotion Control via Synergetic Policy Distillation**. *D. Wang, X. Wang, C. Zhang, J. Shi, Y. Zhao, C. Bai, X. Li*. March 2026. [arXiv:2603.03733](https://arxiv.org/abs/2603.03733)

From d9a2f2316cea5db5fb2f0ce839ab22dc6b9ba157 Mon Sep 17 00:00:00 2001
From: eigenvivek <vivekg@mit.edu>
Date: Wed, 18 Mar 2026 17:49:10 -0400
Subject: [PATCH 3/5] Restore two functions that somehow got deleted

Signed-off-by: eigenvivek <vivekg@mit.edu>
---
 warp/_src/builtins.py           | 22 +++++++++++++
 warp/tests/cuda/test_texture.py | 56 +++++++++++++++++++++++----------
 2 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/warp/_src/builtins.py b/warp/_src/builtins.py
index c1d783efa5..001501831c 100644
--- a/warp/_src/builtins.py
+++ b/warp/_src/builtins.py
@@ -9828,6 +9828,17 @@ def vector_assign_dispatch_func(input_types: Mapping[str, type], return_type: An
     group="Utility",
 )
 
+# Bool vector assign_inplace (bool is not part of Scalar)
+add_builtin(
+    "assign_inplace",
+    input_types={"a": vector(length=Any, dtype=bool), "i": Any, "value": Any},
+    value_type=None,
+    dispatch_func=vector_assign_dispatch_func,
+    hidden=True,
+    export=False,
+    group="Utility",
+)
+
 # implements quaternion[index] = value
 add_builtin(
     "assign_inplace",
@@ -9866,6 +9877,17 @@ def vector_assign_copy_value_func(arg_types: Mapping[str, type], arg_values: Map
     group="Utility",
 )
 
+# Bool vector assign_copy (bool is not part of Scalar)
+add_builtin(
+    "assign_copy",
+    input_types={"a": vector(length=Any, dtype=bool), "i": Any, "value": Any},
+    value_func=vector_assign_copy_value_func,
+    dispatch_func=vector_assign_dispatch_func,
+    hidden=True,
+    export=False,
+    group="Utility",
+)
+
 # implements quaternion[index] = value, performs a copy internally if wp.config.enable_vector_component_overwrites is True
 add_builtin(
     "assign_copy",
diff --git a/warp/tests/cuda/test_texture.py b/warp/tests/cuda/test_texture.py
index d686ae10cd..b008a6eafc 100644
--- a/warp/tests/cuda/test_texture.py
+++ b/warp/tests/cuda/test_texture.py
@@ -2836,12 +2836,16 @@ def test_texture2d_adj_vec2f_linear_x(test, device):
     H, W = 6, 10
     data = np.zeros((H, W, 2), dtype=np.float32)
     for x in range(W):
-        data[:, x, 0] = x / W          # channel 0: linear in x
+        data[:, x, 0] = x / W  # channel 0: linear in x
         data[:, x, 1] = (W - 1 - x) / W  # channel 1: linear in x, reversed
 
-    tex = wp.Texture2D(data, normalized_coords=False,
-                       filter_mode=wp.TextureFilterMode.LINEAR,
-                       address_mode=wp.TextureAddressMode.BORDER, device=device)
+    tex = wp.Texture2D(
+        data,
+        normalized_coords=False,
+        filter_mode=wp.TextureFilterMode.LINEAR,
+        address_mode=wp.TextureAddressMode.BORDER,
+        device=device,
+    )
 
     @wp.kernel
     def sample_2d_vec2(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=wp.vec2f)):
@@ -2868,12 +2872,16 @@ def test_texture2d_adj_vec2f_channels_independent(test, device):
     H, W = 6, 10
     data = np.zeros((H, W, 2), dtype=np.float32)
     for x in range(W):
-        data[:, x, 0] = x / W   # channel 0: linear in x
-        data[:, x, 1] = 0.0     # channel 1: constant
+        data[:, x, 0] = x / W  # channel 0: linear in x
+        data[:, x, 1] = 0.0  # channel 1: constant
 
-    tex = wp.Texture2D(data, normalized_coords=False,
-                       filter_mode=wp.TextureFilterMode.LINEAR,
-                       address_mode=wp.TextureAddressMode.BORDER, device=device)
+    tex = wp.Texture2D(
+        data,
+        normalized_coords=False,
+        filter_mode=wp.TextureFilterMode.LINEAR,
+        address_mode=wp.TextureAddressMode.BORDER,
+        device=device,
+    )
 
     @wp.kernel
     def sample_2d_vec2(tex: wp.Texture2D, pos: wp.array(dtype=wp.vec2f), out: wp.array(dtype=wp.vec2f)):
@@ -2899,12 +2907,16 @@ def test_texture3d_adj_vec2f_linear_z(test, device):
     D, H, W = 8, 6, 10
     data = np.zeros((D, H, W, 2), dtype=np.float32)
     for z in range(D):
-        data[z, :, :, 0] = z / D   # channel 0: linear in z
-        data[z, :, :, 1] = 0.0     # channel 1: constant
+        data[z, :, :, 0] = z / D  # channel 0: linear in z
+        data[z, :, :, 1] = 0.0  # channel 1: constant
 
-    tex = wp.Texture3D(data, normalized_coords=False,
-                       filter_mode=wp.TextureFilterMode.LINEAR,
-                       address_mode=wp.TextureAddressMode.BORDER, device=device)
+    tex = wp.Texture3D(
+        data,
+        normalized_coords=False,
+        filter_mode=wp.TextureFilterMode.LINEAR,
+        address_mode=wp.TextureAddressMode.BORDER,
+        device=device,
+    )
 
     @wp.kernel
     def sample_3d_vec2(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.array(dtype=wp.vec2f)):
@@ -2924,6 +2936,7 @@ def sample_3d_vec2(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.arr
     np.testing.assert_allclose(g[1], 0.0, atol=1e-5)
     np.testing.assert_allclose(g[2], 1.0 / D, atol=1e-5)
 
+
 # ============================================================================
 # Test Class
 # ============================================================================
@@ -3212,9 +3225,18 @@ class TestTexture(unittest.TestCase):
 add_function_test(TestTexture, "test_texture3d_adj_linear_x", test_texture3d_adj_linear_x, devices=all_devices)
 add_function_test(TestTexture, "test_texture3d_adj_linear_y", test_texture3d_adj_linear_y, devices=all_devices)
 add_function_test(TestTexture, "test_texture3d_adj_linear_z", test_texture3d_adj_linear_z, devices=all_devices)
-add_function_test(TestTexture, "test_texture2d_adj_vec2f_linear_x", test_texture2d_adj_vec2f_linear_x, devices=all_devices)
-add_function_test(TestTexture, "test_texture2d_adj_vec2f_channels_independent", test_texture2d_adj_vec2f_channels_independent, devices=all_devices)
-add_function_test(TestTexture, "test_texture3d_adj_vec2f_linear_z", test_texture3d_adj_vec2f_linear_z, devices=all_devices)
+add_function_test(
+    TestTexture, "test_texture2d_adj_vec2f_linear_x", test_texture2d_adj_vec2f_linear_x, devices=all_devices
+)
+add_function_test(
+    TestTexture,
+    "test_texture2d_adj_vec2f_channels_independent",
+    test_texture2d_adj_vec2f_channels_independent,
+    devices=all_devices,
+)
+add_function_test(
+    TestTexture, "test_texture3d_adj_vec2f_linear_z", test_texture3d_adj_vec2f_linear_z, devices=all_devices
+)
 
 
 if __name__ == "__main__":

From 057a3ebb810dc4c7782fae485fd1ac66d2d63087 Mon Sep 17 00:00:00 2001
From: eigenvivek <vivekg@mit.edu>
Date: Wed, 18 Mar 2026 20:28:04 -0400
Subject: [PATCH 4/5] Add warnings for non-border modes

Signed-off-by: eigenvivek <vivekg@mit.edu>
---
 warp/_src/texture.py  | 27 ++++++++++++++++
 warp/native/texture.h | 72 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/warp/_src/texture.py b/warp/_src/texture.py
index 59b8e1a32e..886dcaae14 100644
--- a/warp/_src/texture.py
+++ b/warp/_src/texture.py
@@ -7,6 +7,7 @@
 
 import ctypes
 import enum
+import warnings
 from typing import TYPE_CHECKING, ClassVar
 
 import numpy as np
@@ -176,6 +177,13 @@ class Texture:
     ``wp.float16``, and ``wp.float32`` data types. Unsigned integer textures are read as normalized
     floats in [0, 1]; signed integer textures are normalized to [-1, 1]; float types are returned as-is.
 
+    .. warning::
+        **Automatic differentiation is only correct when all texture address modes are set to BORDER.**
+        Using ``wp.texture_sample()`` with ``requires_grad=True`` on textures with WRAP, CLAMP, or
+        MIRROR address modes will produce silent gradient errors. The gradient computation zeros out
+        when sampling positions straddle texture boundaries, which is correct for BORDER mode but
+        incorrect for other modes where the forward pass returns valid interpolated data.
+
     This class should not be instantiated directly. A specific subclass should be used instead
     (:class:`Texture1D`, :class:`Texture2D`, or :class:`Texture3D`).
 
@@ -275,6 +283,25 @@ def __init__(
             self._resolve_address_mode(address_mode, address_mode_w, 2) if ndim > 2 else TextureAddressMode.CLAMP
         )
 
+        # Warn if using non-BORDER address modes (differentiation only supports BORDER)
+        non_border_modes = []
+        if address_mode_u != TextureAddressMode.BORDER:
+            non_border_modes.append(f"U={TextureAddressMode(address_mode_u).name}")
+        if ndim > 1 and address_mode_v != TextureAddressMode.BORDER:
+            non_border_modes.append(f"V={TextureAddressMode(address_mode_v).name}")
+        if ndim > 2 and address_mode_w != TextureAddressMode.BORDER:
+            non_border_modes.append(f"W={TextureAddressMode(address_mode_w).name}")
+
+        if non_border_modes:
+            warnings.warn(
+                f"Texture created with non-BORDER address mode(s): {', '.join(non_border_modes)}. "
+                f"Automatic differentiation (wp.texture_sample with requires_grad=True) only produces "
+                f"correct gradients when all address modes are BORDER. Non-BORDER modes will silently "
+                f"return incorrect gradients at texture boundaries.",
+                UserWarning,
+                stacklevel=2,
+            )
+
         # if an external CUDA array was given, infer texture shape and dtype from it
         if cuda_array:
             if not device.is_cuda:
diff --git a/warp/native/texture.h b/warp/native/texture.h
index ac49cecc79..e4f0ea2b73 100644
--- a/warp/native/texture.h
+++ b/warp/native/texture.h
@@ -857,11 +857,24 @@ template <typename T> CUDA_CALLABLE T texture_sample(const texture3d_t& tex, flo
     return texture_sample_helper<T>::sample_3d(tex, u, v, w);
 }
 
-// Adjoints for texture sampling w.r.t. sampling coordinates.
-// Gradients w.r.t. texture data are not supported; adj_tex is a no-op.
-// On GPU, requires filter_mode and use_normalized_coords in the descriptor.
-// Boundary behavior matches PyTorch grid_sample with padding_mode="border":
-// gradient is zero when the sampling position straddles a volume boundary.
+// ============================================================================
+// Texture Sampling Adjoints
+// ============================================================================
+//
+// IMPORTANT: Differentiation is only correct when all texture address modes
+// are set to BORDER (WP_TEXTURE_ADDRESS_BORDER = 3).
+//
+// The gradient computation zeros out when sampling positions straddle texture
+// boundaries, which is correct for BORDER mode (returns 0 outside bounds) but
+// incorrect for WRAP/MIRROR/CLAMP modes where the forward pass returns valid
+// interpolated data across boundaries.
+//
+// Using differentiation with WRAP (mode 0), CLAMP (mode 1), or MIRROR (mode 2)
+// will silently produce incorrect gradients without error or warning.
+//
+// Future work: Implement proper gradient computation for all address modes.
+// ============================================================================
+
 template <typename T>
 CUDA_CALLABLE void
 adj_texture_sample(const texture1d_t& tex, float u, texture1d_t& adj_tex, float& adj_u, const T& adj_ret)
@@ -869,6 +882,23 @@ adj_texture_sample(const texture1d_t& tex, float u, texture1d_t& adj_tex, float&
     if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST)
         return;
 
+#ifndef NDEBUG
+// Warning: This check is only active in debug builds
+// Differentiation is only correct for BORDER address mode
+#if !defined(__CUDA_ARCH__)
+    if (tex.tex != 0) {
+        const Texture* cpu_tex = (const Texture*)tex.tex;
+        if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER) {
+            printf(
+                "WARNING: texture_sample adjoint may produce incorrect gradients. "
+                "Address mode is %d but differentiation only supports BORDER mode (3).\n",
+                cpu_tex->address_mode_u
+            );
+        }
+    }
+#endif
+#endif
+
     float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f;
 
 #if defined(__CUDA_ARCH__)
@@ -927,6 +957,22 @@ CUDA_CALLABLE void adj_texture_sample(
     if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST)
         return;
 
+#ifndef NDEBUG
+#if !defined(__CUDA_ARCH__)
+    if (tex.tex != 0) {
+        const Texture* cpu_tex = (const Texture*)tex.tex;
+        if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER
+            || cpu_tex->address_mode_v != WP_TEXTURE_ADDRESS_BORDER) {
+            printf(
+                "WARNING: texture_sample adjoint may produce incorrect gradients. "
+                "Address modes are (%d, %d) but differentiation only supports BORDER mode (3).\n",
+                cpu_tex->address_mode_u, cpu_tex->address_mode_v
+            );
+        }
+    }
+#endif
+#endif
+
     float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f;
     float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f;
 
@@ -1034,6 +1080,22 @@ CUDA_CALLABLE void adj_texture_sample(
     if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST)
         return;
 
+#ifndef NDEBUG
+#if !defined(__CUDA_ARCH__)
+    if (tex.tex != 0) {
+        const Texture* cpu_tex = (const Texture*)tex.tex;
+        if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER || cpu_tex->address_mode_v != WP_TEXTURE_ADDRESS_BORDER
+            || cpu_tex->address_mode_w != WP_TEXTURE_ADDRESS_BORDER) {
+            printf(
+                "WARNING: texture_sample adjoint may produce incorrect gradients. "
+                "Address modes are (%d, %d, %d) but differentiation only supports BORDER mode (3).\n",
+                cpu_tex->address_mode_u, cpu_tex->address_mode_v, cpu_tex->address_mode_w
+            );
+        }
+    }
+#endif
+#endif
+
     float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f;
     float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f;
     float gtz_mult = tex.use_normalized_coords ? (float)tex.depth : 1.0f;

From 1d30e722659e66d8b70ac0ce1ae63141dfa337c6 Mon Sep 17 00:00:00 2001
From: eigenvivek <vivekg@mit.edu>
Date: Wed, 18 Mar 2026 21:41:39 -0400
Subject: [PATCH 5/5] Update

Signed-off-by: eigenvivek <vivekg@mit.edu>
---
 PUBLICATIONS.md                 |   2 -
 warp/_src/texture.py            |  83 ++++++++++++-------
 warp/native/texture.h           | 129 +++++++++++++++++++-----------
 warp/tests/cuda/test_texture.py | 137 ++++++++++++++++++++++++++++++++
 4 files changed, 275 insertions(+), 76 deletions(-)

diff --git a/PUBLICATIONS.md b/PUBLICATIONS.md
index 885a8f953f..d69ffd1502 100644
--- a/PUBLICATIONS.md
+++ b/PUBLICATIONS.md
@@ -8,8 +8,6 @@ pull request on GitHub or email a link to your arXiv preprint (preferred) or DOI
 ## 2026
 
 - **Discovering neural cohesive zone laws from displacement fields**. *G. Barkoulis Gavris, W. Sun*. April 2026. [DOI:10.1016/j.cma.2026.118733](https://doi.org/10.1016/j.cma.2026.118733)
-- **Kamino: GPU-based Massively Parallel Simulation of Multi-Body Systems with Challenging Topologies**. *V. Tsounis, G. Maloisel, C. Schumacher, R. Grandia, A. Serifi, D. Müller, C. Amevor, T. Widmer, M. Bächer*. March 2026. [arXiv:2603.16536](https://arxiv.org/abs/2603.16536)
-- **ComFree-Sim: A GPU-Parallelized Analytical Contact Physics Engine for Scalable Contact-Rich Robotics Simulation and Control**. *C. Borse, Z. Xie, W. Huang, W. Jin*. March 2026. [arXiv:2603.12185](https://arxiv.org/abs/2603.12185)
 - **cuRoboV2: Dynamics-Aware Motion Generation with Depth-Fused Distance Fields for High-DoF Robots**. *B. Sundaralingam, A. Murali, S. Birchfield*. March 2026. [arXiv:2603.05493](https://arxiv.org/abs/2603.05493)
 - **GaussTwin: Unified Simulation and Correction with Gaussian Splatting for Robotic Digital Twins**. *Y. Cai, P. Jansonnie, C. de Farias, O. Arenz, J. Peters*. March 2026. [arXiv:2603.05108](https://arxiv.org/abs/2603.05108)
 - **X-Loco: Towards Generalist Humanoid Locomotion Control via Synergetic Policy Distillation**. *D. Wang, X. Wang, C. Zhang, J. Shi, Y. Zhao, C. Bai, X. Li*. March 2026. [arXiv:2603.03733](https://arxiv.org/abs/2603.03733)
diff --git a/warp/_src/texture.py b/warp/_src/texture.py
index 886dcaae14..773d4ad483 100644
--- a/warp/_src/texture.py
+++ b/warp/_src/texture.py
@@ -7,7 +7,6 @@
 
 import ctypes
 import enum
-import warnings
 from typing import TYPE_CHECKING, ClassVar
 
 import numpy as np
@@ -83,14 +82,16 @@ class texture1d_t(ctypes.Structure):
         ("num_channels", ctypes.c_int32),
         ("filter_mode", ctypes.c_int32),
         ("use_normalized_coords", ctypes.c_int32),
+        ("address_mode_u", ctypes.c_int32),
     )
 
-    def __init__(self, tex=0, width=0, num_channels=0, filter_mode=0, use_normalized_coords=1):
+    def __init__(self, tex=0, width=0, num_channels=0, filter_mode=0, use_normalized_coords=1, address_mode_u=0):
         self.tex = tex
         self.width = width
         self.num_channels = num_channels
         self.filter_mode = filter_mode
         self.use_normalized_coords = use_normalized_coords
+        self.address_mode_u = address_mode_u
 
 
 class texture2d_t(ctypes.Structure):
@@ -106,15 +107,29 @@ class texture2d_t(ctypes.Structure):
         ("num_channels", ctypes.c_int32),
         ("filter_mode", ctypes.c_int32),
         ("use_normalized_coords", ctypes.c_int32),
+        ("address_mode_u", ctypes.c_int32),
+        ("address_mode_v", ctypes.c_int32),
     )
 
-    def __init__(self, tex=0, width=0, height=0, num_channels=0, filter_mode=0, use_normalized_coords=1):
+    def __init__(
+        self,
+        tex=0,
+        width=0,
+        height=0,
+        num_channels=0,
+        filter_mode=0,
+        use_normalized_coords=1,
+        address_mode_u=0,
+        address_mode_v=0,
+    ):
         self.tex = tex
         self.width = width
         self.height = height
         self.num_channels = num_channels
         self.filter_mode = filter_mode
         self.use_normalized_coords = use_normalized_coords
+        self.address_mode_u = address_mode_u
+        self.address_mode_v = address_mode_v
 
 
 class texture3d_t(ctypes.Structure):
@@ -131,9 +146,24 @@ class texture3d_t(ctypes.Structure):
         ("num_channels", ctypes.c_int32),
         ("filter_mode", ctypes.c_int32),
         ("use_normalized_coords", ctypes.c_int32),
+        ("address_mode_u", ctypes.c_int32),
+        ("address_mode_v", ctypes.c_int32),
+        ("address_mode_w", ctypes.c_int32),
     )
 
-    def __init__(self, tex=0, width=0, height=0, depth=0, num_channels=0, filter_mode=0, use_normalized_coords=1):
+    def __init__(
+        self,
+        tex=0,
+        width=0,
+        height=0,
+        depth=0,
+        num_channels=0,
+        filter_mode=0,
+        use_normalized_coords=1,
+        address_mode_u=0,
+        address_mode_v=0,
+        address_mode_w=0,
+    ):
         self.tex = tex
         self.width = width
         self.height = height
@@ -141,6 +171,9 @@ def __init__(self, tex=0, width=0, height=0, depth=0, num_channels=0, filter_mod
         self.num_channels = num_channels
         self.filter_mode = filter_mode
         self.use_normalized_coords = use_normalized_coords
+        self.address_mode_u = address_mode_u
+        self.address_mode_v = address_mode_v
+        self.address_mode_w = address_mode_w
 
 
 class cuda_array_desc_t(ctypes.Structure):
@@ -178,11 +211,18 @@ class Texture:
     floats in [0, 1]; signed integer textures are normalized to [-1, 1]; float types are returned as-is.
 
     .. warning::
-        **Automatic differentiation is only correct when all texture address modes are set to BORDER.**
-        Using ``wp.texture_sample()`` with ``requires_grad=True`` on textures with WRAP, CLAMP, or
-        MIRROR address modes will produce silent gradient errors. The gradient computation zeros out
-        when sampling positions straddle texture boundaries, which is correct for BORDER mode but
-        incorrect for other modes where the forward pass returns valid interpolated data.
+        **Automatic differentiation with LINEAR filtering is only correct when all texture
+        address modes are set to BORDER.**
+
+        Using ``wp.texture_sample()`` with ``requires_grad=True``, ``filter_mode=LINEAR``,
+        and address modes other than BORDER (WRAP/CLAMP/MIRROR) will produce silent gradient
+        errors at texture boundaries. The gradient computation assumes BORDER behavior
+        (returns zero outside bounds).
+
+        If you need automatic differentiation with LINEAR filtering, create textures with
+        ``address_mode=wp.TextureAddressMode.BORDER``. CLOSEST filtering does not have this
+        limitation (gradients are always zero).
+
 
     This class should not be instantiated directly. A specific subclass should be used instead
     (:class:`Texture1D`, :class:`Texture2D`, or :class:`Texture3D`).
@@ -283,25 +323,6 @@ def __init__(
             self._resolve_address_mode(address_mode, address_mode_w, 2) if ndim > 2 else TextureAddressMode.CLAMP
         )
 
-        # Warn if using non-BORDER address modes (differentiation only supports BORDER)
-        non_border_modes = []
-        if address_mode_u != TextureAddressMode.BORDER:
-            non_border_modes.append(f"U={TextureAddressMode(address_mode_u).name}")
-        if ndim > 1 and address_mode_v != TextureAddressMode.BORDER:
-            non_border_modes.append(f"V={TextureAddressMode(address_mode_v).name}")
-        if ndim > 2 and address_mode_w != TextureAddressMode.BORDER:
-            non_border_modes.append(f"W={TextureAddressMode(address_mode_w).name}")
-
-        if non_border_modes:
-            warnings.warn(
-                f"Texture created with non-BORDER address mode(s): {', '.join(non_border_modes)}. "
-                f"Automatic differentiation (wp.texture_sample with requires_grad=True) only produces "
-                f"correct gradients when all address modes are BORDER. Non-BORDER modes will silently "
-                f"return incorrect gradients at texture boundaries.",
-                UserWarning,
-                stacklevel=2,
-            )
-
         # if an external CUDA array was given, infer texture shape and dtype from it
         if cuda_array:
             if not device.is_cuda:
@@ -1001,6 +1022,7 @@ def __ctype__(self) -> texture1d_t:
             self._num_channels,
             int(self._filter_mode),
             int(self._normalized_coords),
+            int(self._address_mode_u),
         )
 
 
@@ -1085,6 +1107,8 @@ def __ctype__(self) -> texture2d_t:
             self._num_channels,
             int(self._filter_mode),
             int(self._normalized_coords),
+            int(self._address_mode_u),
+            int(self._address_mode_v),
         )
 
 
@@ -1174,6 +1198,9 @@ def __ctype__(self) -> texture3d_t:
             self._num_channels,
             int(self._filter_mode),
             int(self._normalized_coords),
+            int(self._address_mode_u),
+            int(self._address_mode_v),
+            int(self._address_mode_w),
         )
 
 
diff --git a/warp/native/texture.h b/warp/native/texture.h
index e4f0ea2b73..90fc6e0f00 100644
--- a/warp/native/texture.h
+++ b/warp/native/texture.h
@@ -157,6 +157,7 @@ struct texture1d_t {
     int32 num_channels;
     int32 filter_mode;
     int32 use_normalized_coords;
+    int32 address_mode_u;
 
     CUDA_CALLABLE inline texture1d_t()
         : tex(0)
@@ -164,17 +165,24 @@ struct texture1d_t {
         , num_channels(0)
         , filter_mode(0)
         , use_normalized_coords(1)
+        , address_mode_u(0)
     {
     }
 
     CUDA_CALLABLE inline texture1d_t(
-        uint64 tex, int32 width, int32 num_channels, int32 filter_mode, int32 use_normalized_coords
+        uint64 tex,
+        int32 width,
+        int32 num_channels,
+        int32 filter_mode,
+        int32 use_normalized_coords,
+        int32 address_mode_u
     )
         : tex(tex)
         , width(width)
         , num_channels(num_channels)
         , filter_mode(filter_mode)
         , use_normalized_coords(use_normalized_coords)
+        , address_mode_u(address_mode_u)
     {
     }
 };
@@ -186,6 +194,8 @@ struct texture2d_t {
     int32 num_channels;
     int32 filter_mode;
     int32 use_normalized_coords;
+    int32 address_mode_u;
+    int32 address_mode_v;
 
     CUDA_CALLABLE inline texture2d_t()
         : tex(0)
@@ -194,11 +204,20 @@ struct texture2d_t {
         , num_channels(0)
         , filter_mode(0)
         , use_normalized_coords(1)
+        , address_mode_u(0)
+        , address_mode_v(0)
     {
     }
 
     CUDA_CALLABLE inline texture2d_t(
-        uint64 tex, int32 width, int32 height, int32 num_channels, int32 filter_mode, int32 use_normalized_coords
+        uint64 tex,
+        int32 width,
+        int32 height,
+        int32 num_channels,
+        int32 filter_mode,
+        int32 use_normalized_coords,
+        int32 address_mode_u,
+        int32 address_mode_v
     )
         : tex(tex)
         , width(width)
@@ -206,6 +225,8 @@ struct texture2d_t {
         , num_channels(num_channels)
         , filter_mode(filter_mode)
         , use_normalized_coords(use_normalized_coords)
+        , address_mode_u(address_mode_u)
+        , address_mode_v(address_mode_v)
     {
     }
 };
@@ -218,6 +239,9 @@ struct texture3d_t {
     int32 num_channels;
     int32 filter_mode;
     int32 use_normalized_coords;
+    int32 address_mode_u;
+    int32 address_mode_v;
+    int32 address_mode_w;
 
     CUDA_CALLABLE inline texture3d_t()
         : tex(0)
@@ -227,6 +251,9 @@ struct texture3d_t {
         , num_channels(0)
         , filter_mode(0)
         , use_normalized_coords(1)
+        , address_mode_u(0)
+        , address_mode_v(0)
+        , address_mode_w(0)
     {
     }
 
@@ -237,7 +264,10 @@ struct texture3d_t {
         int32 depth,
         int32 num_channels,
         int32 filter_mode,
-        int32 use_normalized_coords
+        int32 use_normalized_coords,
+        int32 address_mode_u,
+        int32 address_mode_v,
+        int32 address_mode_w
     )
         : tex(tex)
         , width(width)
@@ -246,6 +276,9 @@ struct texture3d_t {
         , num_channels(num_channels)
         , filter_mode(filter_mode)
         , use_normalized_coords(use_normalized_coords)
+        , address_mode_u(address_mode_u)
+        , address_mode_v(address_mode_v)
+        , address_mode_w(address_mode_w)
     {
     }
 };
@@ -882,22 +915,23 @@ adj_texture_sample(const texture1d_t& tex, float u, texture1d_t& adj_tex, float&
     if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST)
         return;
 
-#ifndef NDEBUG
-// Warning: This check is only active in debug builds
-// Differentiation is only correct for BORDER address mode
-#if !defined(__CUDA_ARCH__)
-    if (tex.tex != 0) {
-        const Texture* cpu_tex = (const Texture*)tex.tex;
-        if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER) {
-            printf(
-                "WARNING: texture_sample adjoint may produce incorrect gradients. "
-                "Address mode is %d but differentiation only supports BORDER mode (3).\n",
-                cpu_tex->address_mode_u
-            );
-        }
-    }
-#endif
+    // Check address mode compatibility with differentiation
+    if (tex.address_mode_u != WP_TEXTURE_ADDRESS_BORDER) {
+#if defined(__CUDA_ARCH__)
+        printf(
+            "ERROR: texture_sample gradient computation requires BORDER address mode. "
+            "Texture has address_mode_u=%d. Gradients will be incorrect.\n",
+            tex.address_mode_u
+        );
+#else
+        printf(
+            "ERROR: texture_sample gradient computation requires BORDER address mode. "
+            "Texture has address_mode_u=%d. Gradients will be incorrect.\n",
+            tex.address_mode_u
+        );
 #endif
+        return;  // Return zero gradient
+    }
 
     float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f;
 
@@ -957,21 +991,22 @@ CUDA_CALLABLE void adj_texture_sample(
     if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST)
         return;
 
-#ifndef NDEBUG
-#if !defined(__CUDA_ARCH__)
-    if (tex.tex != 0) {
-        const Texture* cpu_tex = (const Texture*)tex.tex;
-        if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER
-            || cpu_tex->address_mode_v != WP_TEXTURE_ADDRESS_BORDER) {
-            printf(
-                "WARNING: texture_sample adjoint may produce incorrect gradients. "
-                "Address modes are (%d, %d) but differentiation only supports BORDER mode (3).\n",
-                cpu_tex->address_mode_u, cpu_tex->address_mode_v
-            );
-        }
-    }
-#endif
+    if (tex.address_mode_u != WP_TEXTURE_ADDRESS_BORDER || tex.address_mode_v != WP_TEXTURE_ADDRESS_BORDER) {
+#if defined(__CUDA_ARCH__)
+        printf(
+            "ERROR: texture_sample gradient computation requires BORDER address mode. "
+            "Texture has address modes (%d, %d). Gradients will be incorrect.\n",
+            tex.address_mode_u, tex.address_mode_v
+        );
+#else
+        printf(
+            "ERROR: texture_sample gradient computation requires BORDER address mode. "
+            "Texture has address modes (%d, %d). Gradients will be incorrect.\n",
+            tex.address_mode_u, tex.address_mode_v
+        );
 #endif
+        return;
+    }
 
     float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f;
     float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f;
@@ -1080,21 +1115,23 @@ CUDA_CALLABLE void adj_texture_sample(
     if (tex.filter_mode == WP_TEXTURE_FILTER_CLOSEST)
         return;
 
-#ifndef NDEBUG
-#if !defined(__CUDA_ARCH__)
-    if (tex.tex != 0) {
-        const Texture* cpu_tex = (const Texture*)tex.tex;
-        if (cpu_tex->address_mode_u != WP_TEXTURE_ADDRESS_BORDER || cpu_tex->address_mode_v != WP_TEXTURE_ADDRESS_BORDER
-            || cpu_tex->address_mode_w != WP_TEXTURE_ADDRESS_BORDER) {
-            printf(
-                "WARNING: texture_sample adjoint may produce incorrect gradients. "
-                "Address modes are (%d, %d, %d) but differentiation only supports BORDER mode (3).\n",
-                cpu_tex->address_mode_u, cpu_tex->address_mode_v, cpu_tex->address_mode_w
-            );
-        }
-    }
-#endif
+    if (tex.address_mode_u != WP_TEXTURE_ADDRESS_BORDER || tex.address_mode_v != WP_TEXTURE_ADDRESS_BORDER
+        || tex.address_mode_w != WP_TEXTURE_ADDRESS_BORDER) {
+#if defined(__CUDA_ARCH__)
+        printf(
+            "ERROR: texture_sample gradient computation requires BORDER address mode. "
+            "Texture has address modes (%d, %d, %d). Gradients will be incorrect.\n",
+            tex.address_mode_u, tex.address_mode_v, tex.address_mode_w
+        );
+#else
+        printf(
+            "ERROR: texture_sample gradient computation requires BORDER address mode. "
+            "Texture has address modes (%d, %d, %d). Gradients will be incorrect.\n",
+            tex.address_mode_u, tex.address_mode_v, tex.address_mode_w
+        );
 #endif
+        return;
+    }
 
     float gtx_mult = tex.use_normalized_coords ? (float)tex.width : 1.0f;
     float gty_mult = tex.use_normalized_coords ? (float)tex.height : 1.0f;
diff --git a/warp/tests/cuda/test_texture.py b/warp/tests/cuda/test_texture.py
index b008a6eafc..46780f1056 100644
--- a/warp/tests/cuda/test_texture.py
+++ b/warp/tests/cuda/test_texture.py
@@ -2677,6 +2677,63 @@ def _grad_3d(data, coord, device):
     return pos.grad.numpy()[0]
 
 
+def _grad_1d_normalized(data, u_normalized, device):
+    """Helper for 1D gradient with normalized coordinates."""
+    tex = wp.Texture1D(
+        data,
+        normalized_coords=True,  # Use default normalized coordinates
+        filter_mode=wp.TextureFilterMode.LINEAR,
+        address_mode=wp.TextureAddressMode.BORDER,
+        device=device,
+    )
+    pos = wp.array([u_normalized], dtype=float, requires_grad=True, device=device)
+    out = wp.zeros(1, dtype=float, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(sample_1d, dim=1, inputs=[tex, pos], outputs=[out], device=device)
+    out.grad = wp.ones(1, dtype=float, device=device)
+    tape.backward()
+    return pos.grad.numpy()[0]
+
+
+def _grad_2d_normalized(data, coord_normalized, device):
+    """Helper for 2D gradient with normalized coordinates."""
+    tex = wp.Texture2D(
+        data,
+        normalized_coords=True,  # Use default normalized coordinates
+        filter_mode=wp.TextureFilterMode.LINEAR,
+        address_mode=wp.TextureAddressMode.BORDER,
+        device=device,
+    )
+    pos = wp.array([wp.vec2f(*coord_normalized)], dtype=wp.vec2f, requires_grad=True, device=device)
+    out = wp.zeros(1, dtype=float, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(sample_2d, dim=1, inputs=[tex, pos], outputs=[out], device=device)
+    out.grad = wp.ones(1, dtype=float, device=device)
+    tape.backward()
+    return pos.grad.numpy()[0]
+
+
+def _grad_3d_normalized(data, coord_normalized, device):
+    """Helper for 3D gradient with normalized coordinates."""
+    tex = wp.Texture3D(
+        data,
+        normalized_coords=True,  # Use default normalized coordinates
+        filter_mode=wp.TextureFilterMode.LINEAR,
+        address_mode=wp.TextureAddressMode.BORDER,
+        device=device,
+    )
+    pos = wp.array([wp.vec3f(*coord_normalized)], dtype=wp.vec3f, requires_grad=True, device=device)
+    out = wp.zeros(1, dtype=float, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(sample_3d, dim=1, inputs=[tex, pos], outputs=[out], device=device)
+    out.grad = wp.ones(1, dtype=float, device=device)
+    tape.backward()
+    return pos.grad.numpy()[0]
+
+
 def test_texture1d_adj_boundary_zero(test, device):
     """Gradient is zero when sampling position straddles the near boundary."""
     data = np.random.default_rng(0).standard_normal(16).astype(np.float32)
@@ -2937,6 +2994,68 @@ def sample_3d_vec2(tex: wp.Texture3D, pos: wp.array(dtype=wp.vec3f), out: wp.arr
     np.testing.assert_allclose(g[2], 1.0 / D, atol=1e-5)
 
 
+def test_texture1d_adj_normalized_boundary(test, device):
+    """1D normalized coords: gradient is zero at boundary (u ≈ 0.0 or u ≈ 1.0)."""
+    data = np.random.default_rng(10).standard_normal(16).astype(np.float32)
+    g_near = _grad_1d_normalized(data, 0.01, device)
+    g_far = _grad_1d_normalized(data, 0.99, device)
+    np.testing.assert_allclose(g_near, 0.0, atol=1e-6)
+    np.testing.assert_allclose(g_far, 0.0, atol=1e-6)
+
+
+def test_texture1d_adj_normalized_linear(test, device):
+    """1D normalized coords: gradient of linear signal."""
+    W = 16
+    data = np.arange(W, dtype=np.float32) / W
+    # With normalized coords, d(value)/d(u_norm) = d(value)/d(u_texel) * d(u_texel)/d(u_norm)
+    # = (1/W) * W = 1.0
+    g = _grad_1d_normalized(data, 0.5, device)
+    np.testing.assert_allclose(g, 1.0, atol=1e-4)
+
+
+def test_texture2d_adj_normalized_boundary(test, device):
+    """2D normalized coords: gradient is zero at boundaries."""
+    data = np.random.default_rng(11).standard_normal((8, 10)).astype(np.float32)
+    g_near = _grad_2d_normalized(data, (0.01, 0.01), device)
+    g_far = _grad_2d_normalized(data, (0.99, 0.99), device)
+    np.testing.assert_allclose(g_near, [0.0, 0.0], atol=1e-6)
+    np.testing.assert_allclose(g_far, [0.0, 0.0], atol=1e-6)
+
+
+def test_texture2d_adj_normalized_linear_x(test, device):
+    """2D normalized coords: x-gradient of signal linear in x."""
+    H, W = 6, 10
+    data = np.zeros((H, W), dtype=np.float32)
+    for x in range(W):
+        data[:, x] = x / W
+    g = _grad_2d_normalized(data, (0.5, 0.5), device)
+    # With normalized coords: d(value)/d(u_norm) = 1.0
+    np.testing.assert_allclose(g[0], 1.0, atol=1e-4)
+    np.testing.assert_allclose(g[1], 0.0, atol=1e-5)
+
+
+def test_texture3d_adj_normalized_boundary(test, device):
+    """3D normalized coords: gradient is zero at boundaries."""
+    data = np.random.default_rng(12).standard_normal((8, 6, 10)).astype(np.float32)
+    g_near = _grad_3d_normalized(data, (0.01, 0.01, 0.01), device)
+    g_far = _grad_3d_normalized(data, (0.99, 0.99, 0.99), device)
+    np.testing.assert_allclose(g_near, [0.0, 0.0, 0.0], atol=1e-6)
+    np.testing.assert_allclose(g_far, [0.0, 0.0, 0.0], atol=1e-6)
+
+
+def test_texture3d_adj_normalized_linear_z(test, device):
+    """3D normalized coords: z-gradient of signal linear in z."""
+    D, H, W = 8, 6, 10
+    data = np.zeros((D, H, W), dtype=np.float32)
+    for z in range(D):
+        data[z, :, :] = z / D
+    g = _grad_3d_normalized(data, (0.5, 0.5, 0.5), device)
+    # With normalized coords: d(value)/d(w_norm) = 1.0
+    np.testing.assert_allclose(g[0], 0.0, atol=1e-5)
+    np.testing.assert_allclose(g[1], 0.0, atol=1e-5)
+    np.testing.assert_allclose(g[2], 1.0, atol=1e-4)
+
+
 # ============================================================================
 # Test Class
 # ============================================================================
@@ -3237,6 +3356,24 @@ class TestTexture(unittest.TestCase):
 add_function_test(
     TestTexture, "test_texture3d_adj_vec2f_linear_z", test_texture3d_adj_vec2f_linear_z, devices=all_devices
 )
+add_function_test(
+    TestTexture, "test_texture1d_adj_normalized_boundary", test_texture1d_adj_normalized_boundary, devices=all_devices
+)
+add_function_test(
+    TestTexture, "test_texture1d_adj_normalized_linear", test_texture1d_adj_normalized_linear, devices=all_devices
+)
+add_function_test(
+    TestTexture, "test_texture2d_adj_normalized_boundary", test_texture2d_adj_normalized_boundary, devices=all_devices
+)
+add_function_test(
+    TestTexture, "test_texture2d_adj_normalized_linear_x", test_texture2d_adj_normalized_linear_x, devices=all_devices
+)
+add_function_test(
+    TestTexture, "test_texture3d_adj_normalized_boundary", test_texture3d_adj_normalized_boundary, devices=all_devices
+)
+add_function_test(
+    TestTexture, "test_texture3d_adj_normalized_linear_z", test_texture3d_adj_normalized_linear_z, devices=all_devices
+)
 
 
 if __name__ == "__main__":