Source code for katgpucbf.mapped_array

################################################################################
# Copyright (c) 2023-2024, National Research Foundation (SARAO)
#
# Licensed under the BSD 3-Clause License (the "License"); you may not use
# this file except in compliance with the License. You may obtain a copy
# of the License at
#
#   https://opensource.org/licenses/BSD-3-Clause
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

"""Implement :class:`MappedArray`."""

from dataclasses import dataclass
from typing import Self

import katsdpsigproc.cuda
import numpy as np
import vkgdr
from katsdpsigproc import accel
from katsdpsigproc.abc import AbstractContext, AbstractDevice


[docs] def make_vkgdr(device: AbstractDevice) -> vkgdr.Vkgdr: """Create a :class:`vkgdr.Vkgdr` for a given device. The device must be a CUDA device. The returned handle uses coherent mappings, which are suitable for use with :class:`MappedArray`. """ if not isinstance(device, katsdpsigproc.cuda.Device): raise TypeError("device is not a CUDA device") # This is hacky: pycuda.driver.Device doesn't have an API to get the # underlying CUdevice, but its __hash__ implementation returns it. raw_device = hash(device._pycuda_device) # We could quite easily make do with non-coherent mappings and # explicit flushing, but since NVIDIA currently only provides # host-coherent memory, this is a simpler option. return vkgdr.Vkgdr(raw_device, vkgdr.OpenFlags.REQUIRE_COHERENT_BIT)
[docs] @dataclass class MappedArray: """An array visible in the address space of both the host and the device. The host view can be updated using normal numpy code, and the changes will be visible to subsequently enqueued kernels on the device. """ host: np.ndarray device: accel.DeviceArray
[docs] @classmethod def from_slot(cls, vkgdr_handle: vkgdr.Vkgdr, context: AbstractContext, slot: accel.IOSlotBase) -> Self: """Allocate a :class:`MappedArray` to match a slot. Parameters ---------- vkgdr_handle Handle for allocating memory from vkgdr. It must be created from the same device as `context`. context CUDA context in which the device view will be used. slot Slot from which the dtype, shape and padded shape will be extracted. The parameter is annotated as :class:`~katsdpsigproc.accel.IOSlotBase` for convenience, but it must actually be an instance of :class:`~katsdpsigproc.accel.IOSlot`. """ assert isinstance(slot, accel.IOSlot) padded_shape = slot.required_padded_shape() n_bytes = int(np.prod(padded_shape)) * slot.dtype.itemsize with context: handle = vkgdr.pycuda.Memory(vkgdr_handle, n_bytes) # Slice out the shape from the padded shape index = tuple(slice(0, x) for x in slot.shape) host = np.asarray(handle).view(slot.dtype).reshape(padded_shape)[index] device = accel.DeviceArray(context, slot.shape, slot.dtype, padded_shape, raw=handle) return cls(host=host, device=device)