Dayalan Saravanan


CUDA Thread Indexing Reference

Wednesday, November 27, 2024

1D grid of 1D blocks


    __device__ int threadId_1D_1D() {

      int threadId = blockIdx.x * blockDim.x + threadIdx.x;

      return threadId;
    }
    

1D grid of 2D blocks


    __device__ int threadId_1D_2D() {

      int threadId = blockIdx.x * blockDim.x * blockDim.y
                    + threadIdx.y * blockDim.x + threadId.x;

      return threadId;
    }
    

1D grid of 3D blocks


    __device__ int threadId_1D_3D() {

      int threadId = blockIdx.x * blockDim.x * blockDim.y * blockDim.z
                    + threadIdx.z * blockDim.y * blockDim.x
                    + threadIdx.y * blockDim.x + threadIdx.x;

      return threadId;
    }
    

2D grid of 1D blocks


    __device__ int threadId_2D_1D() {

      int blockId = blockIdx.x + blockIdx.y * gridDim.x;

      int threadId = blockId * blockDim.x + threadIdx.x;

      return threadId;
    }
    

2D grid of 2D blocks


    __device__ int threadId_2D_2D() {

      int blockId = blockIdx.x + blockIdx.y * gridDim.x;

      int threadId = blockId * (blockDim.x * blockDim.y)
                    + (threadIdx.y * blockDim.x) + threadIdx.x;

      retrun threadId;
    }
    

2D grid of 3D blocks


    __device__ int threadId_2D_3D() {

      int blockId = blockIdx.x + blockIdx.y * gridDim.x;

      int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
                    + (threadIdx.z * (blockDim.x * blockDim.y)
                    + (threadIdx.y * blockDim.x) + threadIdx.x;

      return threadId;
    }
    

3D grid of 1D blocks


    __device__ int threadId_3D_1D() {

      int blockId = blockIdx.x + blockIdx.y * gridDim.x
                   + gridDim.x * gridDim.y * blockIdx.z;

      int threadId = blockId * blockDim.x + threadIdx.x;

      return threadId;
    }
    

3D grid of 2D blocks


    __device__ int threadId_3D_2D() {

      int blockId = blockIdx.x + blockIdx.y * gridDim.x
                   + gridDim.x * gridDim.y * blockIdx.z;

      int threadId = blockId * (blockDim.x * blockDim.y)
                    + (threadIdx.y * blockDim.x) + threadIdx.x;

      return threadId;
    }
    

3D grid of 3D blocks


    __device__ int threadId_3D_3D() {

      int blockId = blockIdx.x + blockIdx.y * gridDim.x
                   + gridDim.x * gridDim.y * blockIdx.z;

      int threadId = blockId * (blockDim.x * blockDim.y * blockDim.z)
                    + (threadIdx.z * (blockDim.x * blockDim.y))
                    + (threadIdx.y * blockDim.x) + threadId.x;

      return threadId;
    }