Source code for katgpucbf.mapped_array

################################################################################
# Copyright (c) 2023-2024, National Research Foundation (SARAO)
#
# Licensed under the BSD 3-Clause License (the "License"); you may not use
# this file except in compliance with the License. You may obtain a copy
# of the License at
#
#   https://opensource.org/licenses/BSD-3-Clause
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

"""Implement :class:`MappedArray`."""

from dataclasses import dataclass
from typing import Self

import katsdpsigproc.cuda
import numpy as np
import vkgdr
from katsdpsigproc import accel
from katsdpsigproc.abc import AbstractContext, AbstractDevice



[docs]
def make_vkgdr(device: AbstractDevice) -> vkgdr.Vkgdr:
    """Create a :class:`vkgdr.Vkgdr` for a given device.

    The device must be a CUDA device. The returned handle uses coherent
    mappings, which are suitable for use with :class:`MappedArray`.
    """
    if not isinstance(device, katsdpsigproc.cuda.Device):
        raise TypeError("device is not a CUDA device")
    # This is hacky: pycuda.driver.Device doesn't have an API to get the
    # underlying CUdevice, but its __hash__ implementation returns it.
    raw_device = hash(device._pycuda_device)
    # We could quite easily make do with non-coherent mappings and
    # explicit flushing, but since NVIDIA currently only provides
    # host-coherent memory, this is a simpler option.
    return vkgdr.Vkgdr(raw_device, vkgdr.OpenFlags.REQUIRE_COHERENT_BIT)




[docs]
@dataclass
class MappedArray:
    """An array visible in the address space of both the host and the device.

    The host view can be updated using normal numpy code, and the changes will
    be visible to subsequently enqueued kernels on the device.
    """

    host: np.ndarray
    device: accel.DeviceArray


[docs]
    @classmethod
    def from_slot(cls, vkgdr_handle: vkgdr.Vkgdr, context: AbstractContext, slot: accel.IOSlotBase) -> Self:
        """Allocate a :class:`MappedArray` to match a slot.

        Parameters
        ----------
        vkgdr_handle
            Handle for allocating memory from vkgdr. It must be created from the same device
            as `context`.
        context
            CUDA context in which the device view will be used.
        slot
            Slot from which the dtype, shape and padded shape will be
            extracted. The parameter is annotated as
            :class:`~katsdpsigproc.accel.IOSlotBase` for convenience, but it
            must actually be an instance of :class:`~katsdpsigproc.accel.IOSlot`.
        """
        assert isinstance(slot, accel.IOSlot)
        padded_shape = slot.required_padded_shape()
        n_bytes = int(np.prod(padded_shape)) * slot.dtype.itemsize
        with context:
            handle = vkgdr.pycuda.Memory(vkgdr_handle, n_bytes)
        # Slice out the shape from the padded shape
        index = tuple(slice(0, x) for x in slot.shape)
        host = np.asarray(handle).view(slot.dtype).reshape(padded_shape)[index]
        device = accel.DeviceArray(context, slot.shape, slot.dtype, padded_shape, raw=handle)
        return cls(host=host, device=device)