Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
284e2d1
Add Array and ArrayFormat to cuda.core (refs #467)
rparolin May 13, 2026
a679885
Add copy_from / copy_to to cuda.core.Array (refs #467)
rparolin May 13, 2026
a4a5d5e
Add TextureObject and ResourceDescriptor/TextureDescriptor (refs #467)
rparolin May 13, 2026
0a0948a
Add SurfaceObject for kernel-side typed load/store (refs #467)
rparolin May 14, 2026
27b4554
Add ResourceDescriptor.from_linear and .from_pitch2d (refs #467)
rparolin May 14, 2026
168a248
Add docs for texture/surface APIs (refs #467)
rparolin May 14, 2026
53f993e
Add MipmappedArray and ResourceDescriptor.from_mipmapped_array (refs …
rparolin May 14, 2026
229465e
Add texture sampling example (refs #467)
rparolin May 14, 2026
e653b68
Merge branch 'main' into feature/cuda-core-texture-surface-467
rparolin May 15, 2026
aec7fed
Address code review feedback on texture/surface stack (refs #467)
rparolin May 15, 2026
cd68c99
Add 9 cuda.core texture/surface examples (refs #467)
rparolin May 15, 2026
1432c0a
cuda.core: rename Array->CUDAArray, surface_load_store->is_surface_lo…
rparolin Jun 10, 2026
fbe880a
Merge branch 'main' into feature/cuda-core-texture-surface-467
rparolin Jun 10, 2026
264f2e6
cuda.core: fix lint/mypy after merging main; add generated .pyi stubs
rparolin Jun 10, 2026
7982308
cuda.core: rename surface_load_store ctor keyword to is_surface_load_…
rparolin Jun 10, 2026
5673ddb
cuda.core: add 7 texture/surface graphics examples
rparolin Jun 10, 2026
088115b
cuda.core: dedup texture/array validation; fix docstring + address_mo…
rparolin Jun 11, 2026
cf9441c
cuda.core: remove dead _context field and orphaned helper
rparolin Jun 11, 2026
d8c2db6
caustics improvements
rparolin Jun 11, 2026
a8ae98b
cuda.core: add numba-cuda-mlir port of the Stable Fluids example
rparolin Jun 12, 2026
554839f
cuda.core: satisfy ruff on the numba-cuda-mlir fluid example
rparolin Jun 12, 2026
8016ece
Merge remote-tracking branch 'upstream/main' into feature/cuda-core-t…
rparolin Jun 12, 2026
ceeb814
cuda.core: isolate numba-cuda-mlir in its own env (fix texture exampl…
rparolin Jun 15, 2026
e087f0b
cuda.core: route texture/surface/array lifetime through resource-hand…
rparolin Jun 17, 2026
681e046
cuda.core: address #2188 review feedback — copy parity + cuda.core.te…
rparolin Jun 17, 2026
4b4f942
cuda.core: document CUDAArray copy-only interop contract (#2188 decis…
rparolin Jun 17, 2026
aae6af0
cuda.core: scope texture/surface PR to the core API + fluid examples …
rparolin Jun 17, 2026
940858f
cuda.core: keep the mipmap/LOD example in the texture/surface PR
rparolin Jun 17, 2026
ca73fb4
cuda.core: fix test_utils parametrize collection error on pytest 8.4+
rparolin Jun 17, 2026
8212401
cuda.core: satisfy pre-commit (cython-lint, stubs, formatting)
rparolin Jun 17, 2026
cafcc3a
Merge branch 'main' into feature/cuda-core-texture-surface-467
rparolin Jun 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cuda_core/cuda/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,12 @@ class _PatchedProperty(metaclass=_PatchedPropMeta):
from cuda.core._tensor_map import TensorMapDescriptor, TensorMapDescriptorOptions

# isort: split
# Texture/surface types live under the cuda.core.textures namespace (not the
# flat cuda.core namespace); import the subpackage so it is available as
# `cuda.core.textures` after `import cuda.core`.
# Must come after the cuda.core._* extension imports above: loading graph
# earlier interacts badly with the merged-wheel __path__ rewrite and leaves
# Graph/GraphBuilder/GraphCompleteOptions/GraphDebugPrintOptions missing from
# cuda.core.graph.
import cuda.core.graph
import cuda.core.textures
27 changes: 27 additions & 0 deletions cuda_core/cuda/core/_array.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

from cuda.bindings cimport cydriver
from cuda.core._resource_handles cimport ArrayHandle


cdef class CUDAArray:

cdef:
# Owning/non-owning + any parent (mipmap) dependency are encoded
# structurally in the C++ box behind this handle, not in Python state.
ArrayHandle _handle
tuple _shape # (w,), (w, h), or (w, h, d)
cydriver.CUarray_format _format
unsigned int _num_channels # 1, 2, or 4
int _device_id
bint _surface_load_store

cpdef close(self)


# Wrap an existing ArrayHandle as a CUDAArray, querying the driver for the
# array's shape/format/channels/surface-flag metadata. Used by get_level and
# the graphics-interop _from_handle path.
cdef CUDAArray _array_from_handle(ArrayHandle h, int device_id)
174 changes: 174 additions & 0 deletions cuda_core/cuda/core/_array.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
# This file was generated by stubgen-pyx v0.2.6 from cuda_core/cuda/core/_array.pyx

from __future__ import annotations

from enum import IntEnum

from cuda.bindings import cydriver


class ArrayFormat(IntEnum):
"""Element format for a :class:`CUDAArray` allocation.

Mirrors ``CUarray_format`` from the CUDA driver API.
"""
UINT8 = cydriver.CU_AD_FORMAT_UNSIGNED_INT8
UINT16 = cydriver.CU_AD_FORMAT_UNSIGNED_INT16
UINT32 = cydriver.CU_AD_FORMAT_UNSIGNED_INT32
INT8 = cydriver.CU_AD_FORMAT_SIGNED_INT8
INT16 = cydriver.CU_AD_FORMAT_SIGNED_INT16
INT32 = cydriver.CU_AD_FORMAT_SIGNED_INT32
FLOAT16 = cydriver.CU_AD_FORMAT_HALF
FLOAT32 = cydriver.CU_AD_FORMAT_FLOAT

class CUDAArray:
"""An opaque, hardware-laid-out GPU allocation for texture/surface access.

Distinct from :class:`Buffer`: a ``CUarray`` has no exposed device pointer
and can only be accessed from kernels through a :class:`TextureObject` or
:class:`SurfaceObject`. Its memory layout is chosen by the driver for 2D/3D
spatial locality.

**Copy-only interop.** Because the layout is opaque and there is no linear
device pointer, a ``CUDAArray`` cannot expose ``__cuda_array_interface__`` /
DLPack and cannot be shared zero-copy with NumPy, CuPy, numba-cuda, or
PyTorch. Moving data in or out is therefore always a copy: use
:meth:`copy_from` / :meth:`copy_to` against a linear :class:`Buffer` or a
host buffer-protocol object. There is no allocation helper — allocate the
linear :class:`Buffer` yourself (e.g. ``mr.allocate(arr.size_bytes,
stream=s)``) and copy.

Construct via :meth:`from_descriptor`. Only plain 1D/2D/3D allocations are
supported in this initial version; layered/cubemap/sparse variants will
follow once their shape semantics are settled.
"""

def close(self):
"""Release this object's reference to the underlying ``CUarray``.

Destruction (``cuArrayDestroy``) happens via the handle's deleter when
the last reference is dropped; for a non-owning handle (graphics interop
or a mipmap-level view) nothing is destroyed. Idempotent: a second call
(or destruction after ``close()``) is a no-op.
"""

def __init__(self, *args, **kwargs):
...

@classmethod
def from_descriptor(cls, *, shape, format, num_channels, is_surface_load_store=False):
"""Allocate a new CUDA array.

Parameters
----------
shape : tuple of int
``(width,)``, ``(width, height)``, or ``(width, height, depth)``
in elements.
format : ArrayFormat
Element format.
num_channels : int
Channels per element. Must be 1, 2, or 4.
is_surface_load_store : bool
If True, allocate with ``CUDA_ARRAY3D_SURFACE_LDST`` so the array
can be bound as a :class:`SurfaceObject` for kernel-side writes.
Default False.

Returns
-------
CUDAArray
"""

@classmethod
def _from_handle(cls, handle: int, owning: bool, *, device_id=None):
"""Wrap an externally-allocated ``CUarray``.

Intended for graphics interop (``cuGraphicsSubResourceGetMappedArray``)
where the array is owned by the graphics API. With ``owning=False`` the
underlying ``CUarray`` is never destroyed by this object. Shape, format,
and channel count are queried from the driver.
"""

@property
def handle(self):
"""The underlying ``CUarray`` as an integer."""

@property
def shape(self):
"""Allocation shape, in elements."""

@property
def format(self):
"""The element :class:`ArrayFormat`."""

@property
def num_channels(self):
"""Channels per element (1, 2, or 4)."""

@property
def element_size(self):
"""Bytes per element (format size * channels)."""

@property
def device(self):
"""The :class:`Device` this array was allocated on."""

@property
def is_surface_load_store(self):
"""True if this array was created with ``CUDA_ARRAY3D_SURFACE_LDST``
and can be bound as a :class:`SurfaceObject`."""

def _extent_bytes(self):
"""Return (width_bytes, height, depth) for cuMemcpy3D, with height/depth
normalized to >=1 for lower-rank arrays."""

def copy_from(self, src, *, stream) -> None:
"""Copy a full-array's worth of data into this array.

Parameters
----------
src : Buffer or buffer-protocol object
Source data. Must contain at least ``self.size_bytes`` bytes
of contiguous data.
stream : Stream or GraphBuilder
Stream to issue the copy on. A :class:`~cuda.core.graph.GraphBuilder`
is accepted so the copy can be captured into a graph.
"""

def copy_to(self, dst, *, stream):
"""Copy a full-array's worth of data out of this array.

Parameters
----------
dst : Buffer or writable buffer-protocol object
Destination. Must have at least ``self.size_bytes`` bytes of
writable, contiguous space.
stream : Stream or GraphBuilder
Stream to issue the copy on. A :class:`~cuda.core.graph.GraphBuilder`
is accepted so the copy can be captured into a graph.

Returns
-------
The ``dst`` object, for parity with :meth:`Buffer.copy_to`.
"""

@property
def size_bytes(self):
"""Total bytes of array storage (``prod(shape) * element_size``)."""

def __enter__(self):
...

def __exit__(self, exc_type, exc, tb):
...

def __repr__(self):
...
_FORMAT_ELEM_SIZE = {int(ArrayFormat.UINT8): 1, int(ArrayFormat.INT8): 1, int(ArrayFormat.UINT16): 2, int(ArrayFormat.INT16): 2, int(ArrayFormat.FLOAT16): 2, int(ArrayFormat.UINT32): 4, int(ArrayFormat.INT32): 4, int(ArrayFormat.FLOAT32): 4}

def _validate_format_channels(format, num_channels):
"""Validate the ``(format, num_channels)`` pair shared by the array,
mipmap, and texture factories. Raises on an invalid combination."""

def _validate_array_shape(shape):
"""Coerce ``shape`` to a tuple of ints and validate rank (1-3) and that
every extent is >= 1. Returns the normalized tuple."""
Loading
Loading