CUDA
The CUDA plugin provides access to the low-level CUDA Driver API within Tellusim SDK, enabling interoperability between Tellusim GPU resources and native CUDA compute workflows.
#include <platform/cuda/include/TellusimCU.h>
Example
The following example demonstrates how to set up a CUDA context, create interoperable buffers, and launch a compute shader kernel using the CUDA Driver API:
// create Cuda context for our device
CUContext cu_context = CUContext(Context(PlatformCU, device.getFeatures().pciBusID));
if(!cu_context || !cu_context.create()) {
TS_LOG(Error, "main(): can't create Cuda context\n");
return false;
}
// initialize Cuda
if(!CU::init()) {
TS_LOG(Error, "main(): can't init Cuda\n");
return false;
}
// create Cuda device
Device cu_device(cu_context);
if(!cu_device) return false;
// create Cuda shader
CUShader cu_shader = CUShader(cu_device.loadShaderGLSL(Shader::TypeCompute, "main.shader", "COMPUTE_SHADER=1"));
if(!cu_shader) return false;
// create position buffer
Buffer position_buffer = device.createBuffer(Buffer::FlagStorage | Buffer::FlagVertex | Buffer::FlagInterop, sizeof(float32_t) * 4 * grid_size * grid_size);
if(!position_buffer) return false;
// create Cuda position buffer
CUBuffer cu_position_buffer = CUBuffer(cu_device.createBuffer(position_buffer));
if(!cu_position_buffer) return false;
// create Cuda uniform buffer
CUBuffer cu_uniform_buffer = CUBuffer(cu_device.createBuffer(Buffer::FlagStorage, sizeof(ComputeParameters)));
if(!cu_uniform_buffer) return false;
// dispatch kernel using low-level CUDA API
// the same result can be achived with the single cu_compute.dispatch() call
{
// set current context
if(CUContext::error(Tellusim::cuCtxSetCurrent(cu_context.getCUContext()))) {
TS_LOG(Error, "main(): can't set current context\n");
return false;
}
// compute parameters
ComputeParameters compute_parameters;
compute_parameters.size = grid_size;
compute_parameters.scale = scale_slider.getValuef32();
compute_parameters.time = time;
if(CUContext::error(Tellusim::cuMemcpyHtoD(cu_uniform_buffer.getBufferPtr(), &compute_parameters, sizeof(compute_parameters)))) {
TS_LOG(Error, "main(): can't copy uniform parameters\n");
return false;
}
// kernel parameters
CUdeviceptr parameters[] = {
cu_uniform_buffer.getBufferPtr(),
cu_position_buffer.getBufferPtr(),
};
size_t parameters_size = sizeof(CUdeviceptr) * TS_COUNTOF(parameters);
// launch options
void *options[] = {
CU_LAUNCH_PARAM_BUFFER_POINTER, parameters,
CU_LAUNCH_PARAM_BUFFER_SIZE, ¶meters_size,
CU_LAUNCH_PARAM_END,
};
// launch kernel
uint32_t num_groups = udiv(grid_size, group_size);
if(CUContext::error(Tellusim::cuLaunchKernel(cu_shader.getFunction(), num_groups, num_groups, 1, group_size, group_size, 1, 0, cu_context.getStream(), nullptr, options))) {
TS_LOG(Error, "main(): can't launch kernel\n");
return false;
}
// synchronize stream
if(CUContext::error(Tellusim::cuStreamSynchronize(cu_context.getStream()))) {
TS_LOG(Error, "main(): can't synchronize stream\n");
return false;
}
}