nnlib
GPU-accelerated, C/C++ neural network library.
Macros | Functions
tensor_operations_on_device.cu File Reference

Source file defining tensor operations that happen on device. More...

#include "tensor_operations_on_device.cuh"
#include <cmath>
#include <exceptions/unexpected_cuda_call_exception.h>
#include <gpu/assert.cuh>

Macros

#define TILE_WIDTH   16
 Size of a tile when performing tiled matrix multiplication.
 

Functions

__global__ void sumTensorKernel (const float *a, float *destination, size_t size, size_t n)
 
__global__ void fillTensorKernel (float *tensor, float value, size_t size)
 Kernel method to fill a tensor with a value. More...
 
__global__ void fillTensorKernel (float *tensor, const float *value, size_t size)
 
__global__ void addTensorsKernel (const float *a, const float *b, float *destination, size_t size)
 Kernel method to add two tensors together. More...
 
__global__ void addBroadcastKernel (const float *matrix, const float *vector, float *destination, size_t n, size_t m)
 Kernel method to perform broadcast-add operation. More...
 
__global__ void subtractTensorsKernel (const float *a, const float *b, float *destination, size_t size)
 Kernel method to perform tensor subtraction. More...
 
__global__ void mulMatrixVectorKernel (const float *matrix, const float *vector, float *destination, size_t n, size_t m)
 Kernel method to perform matrix-vector multiplication. More...
 
__global__ void multiplyMatricesTilingKernel (const float *m1, const float *m2, float *destination, size_t n, size_t m, size_t k)
 Kernel method to multiply two matrices using a tiling method. More...
 
__global__ void multiplyMatricesNoTilingKernel (const float *m1, const float *m2, float *destination, size_t n, size_t m, size_t k)
 Kernel method to perform naive matrix-matrix multiplication. More...
 
__global__ void multiplyTensorKernel (const float *tensor, float constant, float *destination, size_t size)
 Kernel method to multiply a tensor with a constant. More...
 
__global__ void hadamardTensorsKernel (const float *a, const float *b, float *destination, size_t size)
 Kernel method to apply hadamard product to two tensors. More...
 
__global__ void divideTensorsKernel (const float *a, const float *b, float *destination, size_t size)
 Kernel method to element-wise divide one tensor by another. More...
 
__global__ void logTensorKernel (const float *a, float *destination, size_t size)
 Kernel method to apply the natural logarithm to each element of the tensor. More...
 
__global__ void transposeMatrixKernel (const float *matrix, float *destination, size_t n, size_t m)
 Kernel method to transpose a matrix. More...
 
__global__ void reluKernel (const float *input, float *result, size_t size)
 
__global__ void reluDerivativeKernel (const float *output, float *result, size_t size)
 
__global__ void sigmoidKernel (float *input, float *result, size_t size)
 
void sumTensorOnDevice (const Tensor &tensor, Tensor &destination)
 
void fillTensorOnDevice (Tensor &tensor, float value)
 Fill a tensor with a constant value. More...
 
void fillTensorOnDevice (Tensor &tensor, const Tensor &value)
 
void addTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination)
 Element-wise add two tensors. More...
 
void subtractTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination)
 Subtract one tensor from another. More...
 
void hadamardTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination)
 Perform hadamard product (element-wise multiplication) between two tensors. More...
 
void divideTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination)
 Divide one tensor by another. More...
 
void logTensorOnDevice (const Tensor &a, Tensor &destination)
 Apply natural logarithm to each element of the tensor. More...
 
void addBroadcastOnDevice (const Tensor &matrix, const Tensor &vector, Tensor &destination)
 Perform the broadcast-add operation. More...
 
void multiplyTensorOnDevice (const Tensor &tensor, float constant, Tensor &destination)
 Multiply a tensor with a constant. More...
 
void multiplyMatrixVectorOnDevice (const Tensor &matrix, const Tensor &vector, Tensor &destination)
 Multiply a matrix with a vector. More...
 
void multiplyMatrixMatrixOnDevice (const Tensor &m1, const Tensor &m2, Tensor &destination)
 Multiply a matrix with a matrix. More...
 
void transposeMatrixOnDevice (const Tensor &matrix, Tensor &destination)
 Transpose a matrix. More...
 
void reluTensorOnDevice (const Tensor &tensor, Tensor &destination)
 
void reluDerivativeTensorOnDevice (const Tensor &tensor, Tensor &destination)
 
void sigmoidTensorOnDevice (const Tensor &tensor, Tensor &destination)
 

Detailed Description

Source file defining tensor operations that happen on device.

Author
Jan Warchocki
Date
29 August 2022

Function Documentation

◆ addBroadcastKernel()

__global__ void addBroadcastKernel ( const float *  matrix,
const float *  vector,
float *  destination,
size_t  n,
size_t  m 
)

Kernel method to perform broadcast-add operation.

Parameters
matrixThe data of the matrix.
vectorThe data of the vector to broadcast.
destinationWhere the result of the operation should be stored.
nThe number of rows of the matrix.
mThe number of columns of the matrix. Same as the size of the vector.

◆ addBroadcastOnDevice()

void addBroadcastOnDevice ( const Tensor matrix,
const Tensor vector,
Tensor destination 
)

Perform the broadcast-add operation.

Parameters
matrixThe matrix tensor.
vectorThe vector tensor.
destinationWhere the result of the addition should be stored.

◆ addTensorsKernel()

__global__ void addTensorsKernel ( const float *  a,
const float *  b,
float *  destination,
size_t  size 
)

Kernel method to add two tensors together.

Parameters
aThe data of the first tensor.
bThe data of the second tensor.
destinationWhere the result of the operation should be stored.
sizeThe size of the tensors.

◆ addTensorsOnDevice()

void addTensorsOnDevice ( const Tensor a,
const Tensor b,
Tensor destination 
)

Element-wise add two tensors.

Parameters
aThe first tensor.
bThe second tensor.
destinationWhere the result of the addition should be stored.

◆ divideTensorsKernel()

__global__ void divideTensorsKernel ( const float *  a,
const float *  b,
float *  destination,
size_t  size 
)

Kernel method to element-wise divide one tensor by another.

Parameters
aThe data of the first tensor.
bThe data of the second tensor.
destinationWhere the result of the division should be stored.
sizeThe size of the tensors.

◆ divideTensorsOnDevice()

void divideTensorsOnDevice ( const Tensor a,
const Tensor b,
Tensor destination 
)

Divide one tensor by another.

Parameters
aThe tensor to divide.
bThe tensor to divide by.
destinationWhere the result of the operation should be stored.

◆ fillTensorKernel()

__global__ void fillTensorKernel ( float *  tensor,
float  value,
size_t  size 
)

Kernel method to fill a tensor with a value.

Parameters
tensorThe tensor to fill.
valueThe value to fill the tensor with.
sizeThe size of the tensor.

◆ fillTensorOnDevice()

void fillTensorOnDevice ( Tensor tensor,
float  value 
)

Fill a tensor with a constant value.

Parameters
tensorThe tensor to fill.
valueThe value to fill the tensor with.

◆ hadamardTensorsKernel()

__global__ void hadamardTensorsKernel ( const float *  a,
const float *  b,
float *  destination,
size_t  size 
)

Kernel method to apply hadamard product to two tensors.

Parameters
aThe data of the first tensor.
bThe data of the second tensor.
destinationWhere the result of the hadamard operation should be stored.
sizeThe size of the tensors.

◆ hadamardTensorsOnDevice()

void hadamardTensorsOnDevice ( const Tensor a,
const Tensor b,
Tensor destination 
)

Perform hadamard product (element-wise multiplication) between two tensors.

Parameters
aThe first tensor.
bThe second tensor.
destinationWhere the result of the operation should be stored.

◆ logTensorKernel()

__global__ void logTensorKernel ( const float *  a,
float *  destination,
size_t  size 
)

Kernel method to apply the natural logarithm to each element of the tensor.

Parameters
aThe data of the tensor to apply natural logarithm to.
destinationWhere the result of the natural logarithm should be stored.
sizeThe size of the tensor.

◆ logTensorOnDevice()

void logTensorOnDevice ( const Tensor a,
Tensor destination 
)

Apply natural logarithm to each element of the tensor.

Parameters
aThe tensor to apply natural logarithm to.
destinationWhere the result of the operation should be stored.

◆ mulMatrixVectorKernel()

__global__ void mulMatrixVectorKernel ( const float *  matrix,
const float *  vector,
float *  destination,
size_t  n,
size_t  m 
)

Kernel method to perform matrix-vector multiplication.

Parameters
matrixThe data of the matrix to multiply.
vectorThe data of the vector to multiply.
destinationWhere the result of the operation should be stored.
nThe number of rows of the matrix.
mThe number of columns of the matrix. Same as the size of the vector.

◆ multiplyMatricesNoTilingKernel()

__global__ void multiplyMatricesNoTilingKernel ( const float *  m1,
const float *  m2,
float *  destination,
size_t  n,
size_t  m,
size_t  k 
)

Kernel method to perform naive matrix-matrix multiplication.

Parameters
m1The data of the first matrix.
m2The data of the second matrix.
destinationWhere the result of the operation should be stored.
nThe number of rows of the first matrix.
mThe number of columns of the first matrix.
kThe number of columns of the second matrix.

◆ multiplyMatricesTilingKernel()

__global__ void multiplyMatricesTilingKernel ( const float *  m1,
const float *  m2,
float *  destination,
size_t  n,
size_t  m,
size_t  k 
)

Kernel method to multiply two matrices using a tiling method.

This method currently is not used to perform matrix-matrix multiplication for performance reasons.

Parameters
m1The data of the first matrix.
m2The data of the second matrix.
destinationWhere the result of the operation should be stored.
nThe number of rows of the first matrix.
mThe number of columns of the first matrix.
kThe number of columns of the second matrix.

◆ multiplyMatrixMatrixOnDevice()

void multiplyMatrixMatrixOnDevice ( const Tensor m1,
const Tensor m2,
Tensor destination 
)

Multiply a matrix with a matrix.

Parameters
m1The first matrix tensor.
m2The second matrix tensor.
destinationWhere the result of the multiplication should be stored.

◆ multiplyMatrixVectorOnDevice()

void multiplyMatrixVectorOnDevice ( const Tensor matrix,
const Tensor vector,
Tensor destination 
)

Multiply a matrix with a vector.

Parameters
matrixThe matrix tensor.
vectorThe vector tensor.
destinationWhere the result of the multiplication should be stored.

◆ multiplyTensorKernel()

__global__ void multiplyTensorKernel ( const float *  tensor,
float  constant,
float *  destination,
size_t  size 
)

Kernel method to multiply a tensor with a constant.

Parameters
tensorThe data of the tensor to multiply.
constantThe constant to multiply tensor with.
destinationWhere the result of the operation should be stored.
sizeThe size of the tensor.

◆ multiplyTensorOnDevice()

void multiplyTensorOnDevice ( const Tensor tensor,
float  constant,
Tensor destination 
)

Multiply a tensor with a constant.

Parameters
tensorThe tensor to multiply.
constantThe constant to multiply with.
destinationWhere the result of the multiplication should be stored.

◆ subtractTensorsKernel()

__global__ void subtractTensorsKernel ( const float *  a,
const float *  b,
float *  destination,
size_t  size 
)

Kernel method to perform tensor subtraction.

Parameters
aThe data of the tensor to subtract from.
bThe data of the tensor to be subtracted.
destinationWhere the result of the operation should be stored.
sizeThe size of the tensors.

◆ subtractTensorsOnDevice()

void subtractTensorsOnDevice ( const Tensor a,
const Tensor b,
Tensor destination 
)

Subtract one tensor from another.

Parameters
aThe tensor to subtract from.
bThe tensor to be subtracted.
destinationWhere the result of the subtraction should be stored.

◆ transposeMatrixKernel()

__global__ void transposeMatrixKernel ( const float *  matrix,
float *  destination,
size_t  n,
size_t  m 
)

Kernel method to transpose a matrix.

Parameters
matrixThe data of the matrix to transpose.
destinationWhere the result of the transpose operation should be stored.
nThe number of rows of matrix.
mThe number of columns of matrix.

◆ transposeMatrixOnDevice()

void transposeMatrixOnDevice ( const Tensor matrix,
Tensor destination 
)

Transpose a matrix.

Parameters
matrixThe matrix vector to transpose.
destinationWhere the result of the transpose operation should be stored.