nnlib
GPU-accelerated, C/C++ neural network library.
|
Source file defining tensor operations that happen on device. More...
#include "tensor_operations_on_device.cuh"
#include <cmath>
#include <exceptions/unexpected_cuda_call_exception.h>
#include <gpu/assert.cuh>
Macros | |
#define | TILE_WIDTH 16 |
Size of a tile when performing tiled matrix multiplication. | |
Functions | |
__global__ void | sumTensorKernel (const float *a, float *destination, size_t size, size_t n) |
__global__ void | fillTensorKernel (float *tensor, float value, size_t size) |
Kernel method to fill a tensor with a value. More... | |
__global__ void | fillTensorKernel (float *tensor, const float *value, size_t size) |
__global__ void | addTensorsKernel (const float *a, const float *b, float *destination, size_t size) |
Kernel method to add two tensors together. More... | |
__global__ void | addBroadcastKernel (const float *matrix, const float *vector, float *destination, size_t n, size_t m) |
Kernel method to perform broadcast-add operation. More... | |
__global__ void | subtractTensorsKernel (const float *a, const float *b, float *destination, size_t size) |
Kernel method to perform tensor subtraction. More... | |
__global__ void | mulMatrixVectorKernel (const float *matrix, const float *vector, float *destination, size_t n, size_t m) |
Kernel method to perform matrix-vector multiplication. More... | |
__global__ void | multiplyMatricesTilingKernel (const float *m1, const float *m2, float *destination, size_t n, size_t m, size_t k) |
Kernel method to multiply two matrices using a tiling method. More... | |
__global__ void | multiplyMatricesNoTilingKernel (const float *m1, const float *m2, float *destination, size_t n, size_t m, size_t k) |
Kernel method to perform naive matrix-matrix multiplication. More... | |
__global__ void | multiplyTensorKernel (const float *tensor, float constant, float *destination, size_t size) |
Kernel method to multiply a tensor with a constant. More... | |
__global__ void | hadamardTensorsKernel (const float *a, const float *b, float *destination, size_t size) |
Kernel method to apply hadamard product to two tensors. More... | |
__global__ void | divideTensorsKernel (const float *a, const float *b, float *destination, size_t size) |
Kernel method to element-wise divide one tensor by another. More... | |
__global__ void | logTensorKernel (const float *a, float *destination, size_t size) |
Kernel method to apply the natural logarithm to each element of the tensor. More... | |
__global__ void | transposeMatrixKernel (const float *matrix, float *destination, size_t n, size_t m) |
Kernel method to transpose a matrix. More... | |
__global__ void | reluKernel (const float *input, float *result, size_t size) |
__global__ void | reluDerivativeKernel (const float *output, float *result, size_t size) |
__global__ void | sigmoidKernel (float *input, float *result, size_t size) |
void | sumTensorOnDevice (const Tensor &tensor, Tensor &destination) |
void | fillTensorOnDevice (Tensor &tensor, float value) |
Fill a tensor with a constant value. More... | |
void | fillTensorOnDevice (Tensor &tensor, const Tensor &value) |
void | addTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination) |
Element-wise add two tensors. More... | |
void | subtractTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination) |
Subtract one tensor from another. More... | |
void | hadamardTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination) |
Perform hadamard product (element-wise multiplication) between two tensors. More... | |
void | divideTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination) |
Divide one tensor by another. More... | |
void | logTensorOnDevice (const Tensor &a, Tensor &destination) |
Apply natural logarithm to each element of the tensor. More... | |
void | addBroadcastOnDevice (const Tensor &matrix, const Tensor &vector, Tensor &destination) |
Perform the broadcast-add operation. More... | |
void | multiplyTensorOnDevice (const Tensor &tensor, float constant, Tensor &destination) |
Multiply a tensor with a constant. More... | |
void | multiplyMatrixVectorOnDevice (const Tensor &matrix, const Tensor &vector, Tensor &destination) |
Multiply a matrix with a vector. More... | |
void | multiplyMatrixMatrixOnDevice (const Tensor &m1, const Tensor &m2, Tensor &destination) |
Multiply a matrix with a matrix. More... | |
void | transposeMatrixOnDevice (const Tensor &matrix, Tensor &destination) |
Transpose a matrix. More... | |
void | reluTensorOnDevice (const Tensor &tensor, Tensor &destination) |
void | reluDerivativeTensorOnDevice (const Tensor &tensor, Tensor &destination) |
void | sigmoidTensorOnDevice (const Tensor &tensor, Tensor &destination) |
Source file defining tensor operations that happen on device.
__global__ void addBroadcastKernel | ( | const float * | matrix, |
const float * | vector, | ||
float * | destination, | ||
size_t | n, | ||
size_t | m | ||
) |
Kernel method to perform broadcast-add operation.
matrix | The data of the matrix. |
vector | The data of the vector to broadcast. |
destination | Where the result of the operation should be stored. |
n | The number of rows of the matrix. |
m | The number of columns of the matrix. Same as the size of the vector. |
Perform the broadcast-add operation.
matrix | The matrix tensor. |
vector | The vector tensor. |
destination | Where the result of the addition should be stored. |
__global__ void addTensorsKernel | ( | const float * | a, |
const float * | b, | ||
float * | destination, | ||
size_t | size | ||
) |
Kernel method to add two tensors together.
a | The data of the first tensor. |
b | The data of the second tensor. |
destination | Where the result of the operation should be stored. |
size | The size of the tensors. |
Element-wise add two tensors.
a | The first tensor. |
b | The second tensor. |
destination | Where the result of the addition should be stored. |
__global__ void divideTensorsKernel | ( | const float * | a, |
const float * | b, | ||
float * | destination, | ||
size_t | size | ||
) |
Kernel method to element-wise divide one tensor by another.
a | The data of the first tensor. |
b | The data of the second tensor. |
destination | Where the result of the division should be stored. |
size | The size of the tensors. |
Divide one tensor by another.
a | The tensor to divide. |
b | The tensor to divide by. |
destination | Where the result of the operation should be stored. |
__global__ void fillTensorKernel | ( | float * | tensor, |
float | value, | ||
size_t | size | ||
) |
Kernel method to fill a tensor with a value.
tensor | The tensor to fill. |
value | The value to fill the tensor with. |
size | The size of the tensor. |
void fillTensorOnDevice | ( | Tensor & | tensor, |
float | value | ||
) |
Fill a tensor with a constant value.
tensor | The tensor to fill. |
value | The value to fill the tensor with. |
__global__ void hadamardTensorsKernel | ( | const float * | a, |
const float * | b, | ||
float * | destination, | ||
size_t | size | ||
) |
Kernel method to apply hadamard product to two tensors.
a | The data of the first tensor. |
b | The data of the second tensor. |
destination | Where the result of the hadamard operation should be stored. |
size | The size of the tensors. |
Perform hadamard product (element-wise multiplication) between two tensors.
a | The first tensor. |
b | The second tensor. |
destination | Where the result of the operation should be stored. |
__global__ void logTensorKernel | ( | const float * | a, |
float * | destination, | ||
size_t | size | ||
) |
Kernel method to apply the natural logarithm to each element of the tensor.
a | The data of the tensor to apply natural logarithm to. |
destination | Where the result of the natural logarithm should be stored. |
size | The size of the tensor. |
Apply natural logarithm to each element of the tensor.
a | The tensor to apply natural logarithm to. |
destination | Where the result of the operation should be stored. |
__global__ void mulMatrixVectorKernel | ( | const float * | matrix, |
const float * | vector, | ||
float * | destination, | ||
size_t | n, | ||
size_t | m | ||
) |
Kernel method to perform matrix-vector multiplication.
matrix | The data of the matrix to multiply. |
vector | The data of the vector to multiply. |
destination | Where the result of the operation should be stored. |
n | The number of rows of the matrix. |
m | The number of columns of the matrix. Same as the size of the vector. |
__global__ void multiplyMatricesNoTilingKernel | ( | const float * | m1, |
const float * | m2, | ||
float * | destination, | ||
size_t | n, | ||
size_t | m, | ||
size_t | k | ||
) |
Kernel method to perform naive matrix-matrix multiplication.
m1 | The data of the first matrix. |
m2 | The data of the second matrix. |
destination | Where the result of the operation should be stored. |
n | The number of rows of the first matrix. |
m | The number of columns of the first matrix. |
k | The number of columns of the second matrix. |
__global__ void multiplyMatricesTilingKernel | ( | const float * | m1, |
const float * | m2, | ||
float * | destination, | ||
size_t | n, | ||
size_t | m, | ||
size_t | k | ||
) |
Kernel method to multiply two matrices using a tiling method.
This method currently is not used to perform matrix-matrix multiplication for performance reasons.
m1 | The data of the first matrix. |
m2 | The data of the second matrix. |
destination | Where the result of the operation should be stored. |
n | The number of rows of the first matrix. |
m | The number of columns of the first matrix. |
k | The number of columns of the second matrix. |
Multiply a matrix with a matrix.
m1 | The first matrix tensor. |
m2 | The second matrix tensor. |
destination | Where the result of the multiplication should be stored. |
void multiplyMatrixVectorOnDevice | ( | const Tensor & | matrix, |
const Tensor & | vector, | ||
Tensor & | destination | ||
) |
Multiply a matrix with a vector.
matrix | The matrix tensor. |
vector | The vector tensor. |
destination | Where the result of the multiplication should be stored. |
__global__ void multiplyTensorKernel | ( | const float * | tensor, |
float | constant, | ||
float * | destination, | ||
size_t | size | ||
) |
Kernel method to multiply a tensor with a constant.
tensor | The data of the tensor to multiply. |
constant | The constant to multiply tensor with. |
destination | Where the result of the operation should be stored. |
size | The size of the tensor. |
Multiply a tensor with a constant.
tensor | The tensor to multiply. |
constant | The constant to multiply with. |
destination | Where the result of the multiplication should be stored. |
__global__ void subtractTensorsKernel | ( | const float * | a, |
const float * | b, | ||
float * | destination, | ||
size_t | size | ||
) |
Kernel method to perform tensor subtraction.
a | The data of the tensor to subtract from. |
b | The data of the tensor to be subtracted. |
destination | Where the result of the operation should be stored. |
size | The size of the tensors. |
Subtract one tensor from another.
a | The tensor to subtract from. |
b | The tensor to be subtracted. |
destination | Where the result of the subtraction should be stored. |
__global__ void transposeMatrixKernel | ( | const float * | matrix, |
float * | destination, | ||
size_t | n, | ||
size_t | m | ||
) |
Kernel method to transpose a matrix.
matrix | The data of the matrix to transpose. |
destination | Where the result of the transpose operation should be stored. |
n | The number of rows of matrix . |
m | The number of columns of matrix . |