|
nnlib
GPU-accelerated, C/C++ neural network library.
|
Source file defining tensor operations that happen on device. More...
#include "tensor_operations_on_device.cuh"#include <cmath>#include <exceptions/unexpected_cuda_call_exception.h>#include <gpu/assert.cuh>Macros | |
| #define | TILE_WIDTH 16 |
| Size of a tile when performing tiled matrix multiplication. | |
Functions | |
| __global__ void | sumTensorKernel (const float *a, float *destination, size_t size, size_t n) |
| __global__ void | fillTensorKernel (float *tensor, float value, size_t size) |
| Kernel method to fill a tensor with a value. More... | |
| __global__ void | fillTensorKernel (float *tensor, const float *value, size_t size) |
| __global__ void | addTensorsKernel (const float *a, const float *b, float *destination, size_t size) |
| Kernel method to add two tensors together. More... | |
| __global__ void | addBroadcastKernel (const float *matrix, const float *vector, float *destination, size_t n, size_t m) |
| Kernel method to perform broadcast-add operation. More... | |
| __global__ void | subtractTensorsKernel (const float *a, const float *b, float *destination, size_t size) |
| Kernel method to perform tensor subtraction. More... | |
| __global__ void | mulMatrixVectorKernel (const float *matrix, const float *vector, float *destination, size_t n, size_t m) |
| Kernel method to perform matrix-vector multiplication. More... | |
| __global__ void | multiplyMatricesTilingKernel (const float *m1, const float *m2, float *destination, size_t n, size_t m, size_t k) |
| Kernel method to multiply two matrices using a tiling method. More... | |
| __global__ void | multiplyMatricesNoTilingKernel (const float *m1, const float *m2, float *destination, size_t n, size_t m, size_t k) |
| Kernel method to perform naive matrix-matrix multiplication. More... | |
| __global__ void | multiplyTensorKernel (const float *tensor, float constant, float *destination, size_t size) |
| Kernel method to multiply a tensor with a constant. More... | |
| __global__ void | hadamardTensorsKernel (const float *a, const float *b, float *destination, size_t size) |
| Kernel method to apply hadamard product to two tensors. More... | |
| __global__ void | divideTensorsKernel (const float *a, const float *b, float *destination, size_t size) |
| Kernel method to element-wise divide one tensor by another. More... | |
| __global__ void | logTensorKernel (const float *a, float *destination, size_t size) |
| Kernel method to apply the natural logarithm to each element of the tensor. More... | |
| __global__ void | transposeMatrixKernel (const float *matrix, float *destination, size_t n, size_t m) |
| Kernel method to transpose a matrix. More... | |
| __global__ void | reluKernel (const float *input, float *result, size_t size) |
| __global__ void | reluDerivativeKernel (const float *output, float *result, size_t size) |
| __global__ void | sigmoidKernel (float *input, float *result, size_t size) |
| void | sumTensorOnDevice (const Tensor &tensor, Tensor &destination) |
| void | fillTensorOnDevice (Tensor &tensor, float value) |
| Fill a tensor with a constant value. More... | |
| void | fillTensorOnDevice (Tensor &tensor, const Tensor &value) |
| void | addTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination) |
| Element-wise add two tensors. More... | |
| void | subtractTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination) |
| Subtract one tensor from another. More... | |
| void | hadamardTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination) |
| Perform hadamard product (element-wise multiplication) between two tensors. More... | |
| void | divideTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination) |
| Divide one tensor by another. More... | |
| void | logTensorOnDevice (const Tensor &a, Tensor &destination) |
| Apply natural logarithm to each element of the tensor. More... | |
| void | addBroadcastOnDevice (const Tensor &matrix, const Tensor &vector, Tensor &destination) |
| Perform the broadcast-add operation. More... | |
| void | multiplyTensorOnDevice (const Tensor &tensor, float constant, Tensor &destination) |
| Multiply a tensor with a constant. More... | |
| void | multiplyMatrixVectorOnDevice (const Tensor &matrix, const Tensor &vector, Tensor &destination) |
| Multiply a matrix with a vector. More... | |
| void | multiplyMatrixMatrixOnDevice (const Tensor &m1, const Tensor &m2, Tensor &destination) |
| Multiply a matrix with a matrix. More... | |
| void | transposeMatrixOnDevice (const Tensor &matrix, Tensor &destination) |
| Transpose a matrix. More... | |
| void | reluTensorOnDevice (const Tensor &tensor, Tensor &destination) |
| void | reluDerivativeTensorOnDevice (const Tensor &tensor, Tensor &destination) |
| void | sigmoidTensorOnDevice (const Tensor &tensor, Tensor &destination) |
Source file defining tensor operations that happen on device.
| __global__ void addBroadcastKernel | ( | const float * | matrix, |
| const float * | vector, | ||
| float * | destination, | ||
| size_t | n, | ||
| size_t | m | ||
| ) |
Kernel method to perform broadcast-add operation.
| matrix | The data of the matrix. |
| vector | The data of the vector to broadcast. |
| destination | Where the result of the operation should be stored. |
| n | The number of rows of the matrix. |
| m | The number of columns of the matrix. Same as the size of the vector. |
Perform the broadcast-add operation.
| matrix | The matrix tensor. |
| vector | The vector tensor. |
| destination | Where the result of the addition should be stored. |
| __global__ void addTensorsKernel | ( | const float * | a, |
| const float * | b, | ||
| float * | destination, | ||
| size_t | size | ||
| ) |
Kernel method to add two tensors together.
| a | The data of the first tensor. |
| b | The data of the second tensor. |
| destination | Where the result of the operation should be stored. |
| size | The size of the tensors. |
Element-wise add two tensors.
| a | The first tensor. |
| b | The second tensor. |
| destination | Where the result of the addition should be stored. |
| __global__ void divideTensorsKernel | ( | const float * | a, |
| const float * | b, | ||
| float * | destination, | ||
| size_t | size | ||
| ) |
Kernel method to element-wise divide one tensor by another.
| a | The data of the first tensor. |
| b | The data of the second tensor. |
| destination | Where the result of the division should be stored. |
| size | The size of the tensors. |
Divide one tensor by another.
| a | The tensor to divide. |
| b | The tensor to divide by. |
| destination | Where the result of the operation should be stored. |
| __global__ void fillTensorKernel | ( | float * | tensor, |
| float | value, | ||
| size_t | size | ||
| ) |
Kernel method to fill a tensor with a value.
| tensor | The tensor to fill. |
| value | The value to fill the tensor with. |
| size | The size of the tensor. |
| void fillTensorOnDevice | ( | Tensor & | tensor, |
| float | value | ||
| ) |
Fill a tensor with a constant value.
| tensor | The tensor to fill. |
| value | The value to fill the tensor with. |
| __global__ void hadamardTensorsKernel | ( | const float * | a, |
| const float * | b, | ||
| float * | destination, | ||
| size_t | size | ||
| ) |
Kernel method to apply hadamard product to two tensors.
| a | The data of the first tensor. |
| b | The data of the second tensor. |
| destination | Where the result of the hadamard operation should be stored. |
| size | The size of the tensors. |
Perform hadamard product (element-wise multiplication) between two tensors.
| a | The first tensor. |
| b | The second tensor. |
| destination | Where the result of the operation should be stored. |
| __global__ void logTensorKernel | ( | const float * | a, |
| float * | destination, | ||
| size_t | size | ||
| ) |
Kernel method to apply the natural logarithm to each element of the tensor.
| a | The data of the tensor to apply natural logarithm to. |
| destination | Where the result of the natural logarithm should be stored. |
| size | The size of the tensor. |
Apply natural logarithm to each element of the tensor.
| a | The tensor to apply natural logarithm to. |
| destination | Where the result of the operation should be stored. |
| __global__ void mulMatrixVectorKernel | ( | const float * | matrix, |
| const float * | vector, | ||
| float * | destination, | ||
| size_t | n, | ||
| size_t | m | ||
| ) |
Kernel method to perform matrix-vector multiplication.
| matrix | The data of the matrix to multiply. |
| vector | The data of the vector to multiply. |
| destination | Where the result of the operation should be stored. |
| n | The number of rows of the matrix. |
| m | The number of columns of the matrix. Same as the size of the vector. |
| __global__ void multiplyMatricesNoTilingKernel | ( | const float * | m1, |
| const float * | m2, | ||
| float * | destination, | ||
| size_t | n, | ||
| size_t | m, | ||
| size_t | k | ||
| ) |
Kernel method to perform naive matrix-matrix multiplication.
| m1 | The data of the first matrix. |
| m2 | The data of the second matrix. |
| destination | Where the result of the operation should be stored. |
| n | The number of rows of the first matrix. |
| m | The number of columns of the first matrix. |
| k | The number of columns of the second matrix. |
| __global__ void multiplyMatricesTilingKernel | ( | const float * | m1, |
| const float * | m2, | ||
| float * | destination, | ||
| size_t | n, | ||
| size_t | m, | ||
| size_t | k | ||
| ) |
Kernel method to multiply two matrices using a tiling method.
This method currently is not used to perform matrix-matrix multiplication for performance reasons.
| m1 | The data of the first matrix. |
| m2 | The data of the second matrix. |
| destination | Where the result of the operation should be stored. |
| n | The number of rows of the first matrix. |
| m | The number of columns of the first matrix. |
| k | The number of columns of the second matrix. |
Multiply a matrix with a matrix.
| m1 | The first matrix tensor. |
| m2 | The second matrix tensor. |
| destination | Where the result of the multiplication should be stored. |
| void multiplyMatrixVectorOnDevice | ( | const Tensor & | matrix, |
| const Tensor & | vector, | ||
| Tensor & | destination | ||
| ) |
Multiply a matrix with a vector.
| matrix | The matrix tensor. |
| vector | The vector tensor. |
| destination | Where the result of the multiplication should be stored. |
| __global__ void multiplyTensorKernel | ( | const float * | tensor, |
| float | constant, | ||
| float * | destination, | ||
| size_t | size | ||
| ) |
Kernel method to multiply a tensor with a constant.
| tensor | The data of the tensor to multiply. |
| constant | The constant to multiply tensor with. |
| destination | Where the result of the operation should be stored. |
| size | The size of the tensor. |
Multiply a tensor with a constant.
| tensor | The tensor to multiply. |
| constant | The constant to multiply with. |
| destination | Where the result of the multiplication should be stored. |
| __global__ void subtractTensorsKernel | ( | const float * | a, |
| const float * | b, | ||
| float * | destination, | ||
| size_t | size | ||
| ) |
Kernel method to perform tensor subtraction.
| a | The data of the tensor to subtract from. |
| b | The data of the tensor to be subtracted. |
| destination | Where the result of the operation should be stored. |
| size | The size of the tensors. |
Subtract one tensor from another.
| a | The tensor to subtract from. |
| b | The tensor to be subtracted. |
| destination | Where the result of the subtraction should be stored. |
| __global__ void transposeMatrixKernel | ( | const float * | matrix, |
| float * | destination, | ||
| size_t | n, | ||
| size_t | m | ||
| ) |
Kernel method to transpose a matrix.
| matrix | The data of the matrix to transpose. |
| destination | Where the result of the transpose operation should be stored. |
| n | The number of rows of matrix. |
| m | The number of columns of matrix. |