Cheatography
https://cheatography.com
Important classes and functions in CUDA and CCCL.
This is a draft cheat sheet. It is a work in progress and is not finished yet.
CUDA
Kernel execution |
kernelName<<<num_blocks, num_threads, shared_mem, stream>>>(args);
|
Kernel definition |
__global__ kernelName(args){…}
|
Create memory on device |
cudaMalloc(memPtr, sizeInBytes);
|
Free device memory |
|
Create pinned memory on host |
cudaMallocHost(memPtr, size_in_bytes);
|
Free pinnen memory |
cudaFreeHost(memPtr);
|
Create stream |
cudaStreamCreate(stream) ;
|
Destroy stream |
cudaStreamDestroy(stream);
|
Wait for Device |
cudaDeviceSynchronize();
|
Wait for Stream |
cudaStreamSynchronize(stream);
|
Copy data |
cudaMemcpy(dst, src, numElements, cudaMemcpyDefautl);
|
Async mem copy |
cudaMemcpyAsync(dst, src, numElements, cudaMemcpyDefautl, stream);
|
Static device variable |
__device__ type name = value;
|
Static shared mem |
__shared__ type name[size];
|
Dynamic shared mem |
extern __shared__ type* name;
|
Constant device mem |
|
Copy to constant |
cudaMemcpyToSymbol(dst, src, sizeInBytes);
|
Documentation |
|
Device-only function |
__device__ auto functionName(args) -> returnType{…}
|
Host-only function |
__host__ auto functionName(args) -> returnType{…}
|
Host and device function |
__device__ __host__ auto functionName(args) -> returnType{…}
|
|
|
Thrust
Universal Vector |
thrust::universal_vector<type> name(size);
|
|