nnlib
GPU-accelerated, C/C++ neural network library.
|
Source file defining tensor operations that happen on host. More...
Macros | |
#define | SET_ROW_TO_ZERO(index) __m256 v##index = _mm256_setzero_ps() |
Set a row of an 8x8 tile to 0. | |
#define | SET_ALL_ROWS_TO_ZERO() |
Set all rows of an 8x8 tile to 0. More... | |
#define | COMPUTE_ROW(index) |
Compute a single row of an 8x8 tile. More... | |
#define | COMPUTE_ALL_ROWS() |
Compute all rows of an 8x8 tile. More... | |
#define | STORE_ROW(index) _mm256_storeu_ps(destination.data + (row * 8 + (index)) * m + column * 8, v##index) |
Store a row of 8x8 tile to the result matrix. | |
#define | STORE_ALL_ROWS() |
Store all rows of an 8x8 tile to the result matrix. More... | |
Functions | |
float | horizontalAdd (__m256 value) |
Perform a horizontal add of a __m256 value. More... | |
void | sumTensorOnHost (const Tensor &tensor, Tensor &destination) |
Sum all values of a tensor. More... | |
void | fillTensorOnHost (Tensor &tensor, float value) |
Fill a tensor with a constant value. More... | |
void | fillTensorOnHost (Tensor &tensor, const Tensor &value) |
void | addTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination) |
Element-wise add two tensors. More... | |
void | subtractTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination) |
Subtract one tensor from another. More... | |
void | hadamardTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination) |
Perform hadamard product (element-wise multiplication) between two tensors. More... | |
void | divideTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination) |
Divide one tensor by another. More... | |
void | logTensorOnHost (const Tensor &a, Tensor &destination) |
Apply natural logarithm to each element of the tensor. More... | |
void | addBroadcastOnHost (const Tensor &matrix, const Tensor &vector, Tensor &destination) |
Perform the broadcast-add operation. More... | |
void | multiplyTensorOnHost (const Tensor &tensor, float constant, Tensor &destination) |
Multiply a tensor with a constant. More... | |
void | multiplyMatrixVectorOnHost (const Tensor &matrix, const Tensor &vector, Tensor &destination) |
Multiply a matrix with a vector. More... | |
void | naiveMatMul (const Tensor &m1, const Tensor &m2, Tensor &destination, size_t rowStart=0, size_t columnStart=0) |
Naive matrix-matrix multiplication. More... | |
void | multiplyMatrixMatrixOnHost (const Tensor &m1, const Tensor &m2, Tensor &destination) |
Multiply a matrix with a matrix. More... | |
void | transposeMatrixOnHost (const Tensor &matrix, Tensor &destination) |
Transpose a matrix. More... | |
void | reluTensorOnHost (const Tensor &tensor, Tensor &destination) |
void | reluDerivativeTensorOnHost (const Tensor &tensor, Tensor &destination) |
void | sigmoidTensorOnHost (const Tensor &tensor, Tensor &destination) |
Source file defining tensor operations that happen on host.
#define COMPUTE_ALL_ROWS | ( | ) |
Compute all rows of an 8x8 tile.
#define COMPUTE_ROW | ( | index | ) |
Compute a single row of an 8x8 tile.
#define SET_ALL_ROWS_TO_ZERO | ( | ) |
Set all rows of an 8x8 tile to 0.
#define STORE_ALL_ROWS | ( | ) |
Store all rows of an 8x8 tile to the result matrix.
Perform the broadcast-add operation.
matrix | The matrix tensor. |
vector | The vector tensor. |
destination | Where the result of the addition should be stored. |
Element-wise add two tensors.
a | The first tensor. |
b | The second tensor. |
destination | Where the result of the addition should be stored. |
Divide one tensor by another.
a | The tensor to divide. |
b | The tensor to divide by. |
destination | Where the result of the operation should be stored. |
void fillTensorOnHost | ( | Tensor & | tensor, |
float | value | ||
) |
Fill a tensor with a constant value.
tensor | The tensor to fill. |
value | The value to fill the tensor with. |
Perform hadamard product (element-wise multiplication) between two tensors.
a | The first tensor. |
b | The second tensor. |
destination | Where the result of the operation should be stored. |
float horizontalAdd | ( | __m256 | value | ) |
Perform a horizontal add of a __m256
value.
This adds all 8 floats in such a value together.
value | The __m256 variable whose floats should be summed up. |
Apply natural logarithm to each element of the tensor.
a | The tensor to apply natural logarithm to. |
destination | Where the result of the operation should be stored. |
Multiply a matrix with a matrix.
m1 | The first matrix tensor. |
m2 | The second matrix tensor. |
destination | Where the result of the multiplication should be stored. |
void multiplyMatrixVectorOnHost | ( | const Tensor & | matrix, |
const Tensor & | vector, | ||
Tensor & | destination | ||
) |
Multiply a matrix with a vector.
matrix | The matrix tensor. |
vector | The vector tensor. |
destination | Where the result of the multiplication should be stored. |
Multiply a tensor with a constant.
tensor | The tensor to multiply. |
constant | The constant to multiply with. |
destination | Where the result of the multiplication should be stored. |
void naiveMatMul | ( | const Tensor & | m1, |
const Tensor & | m2, | ||
Tensor & | destination, | ||
size_t | rowStart = 0 , |
||
size_t | columnStart = 0 |
||
) |
Naive matrix-matrix multiplication.
The method takes additional rowStart
and columnStart
arguments. These are used for edge cases in SIMD implementation of matrix-matrix multiplication. They correspondingly signify which row and which column to start the computation at.
m1 | The first operand matrix of the multiplication. |
m2 | The second operand matrix of the multiplication. |
destination | Where the result of the multiplication should be stored. |
rowStart | Specify which rows should be computed. |
columnStart | Specify which columns should be computed. |
Subtract one tensor from another.
a | The tensor to subtract from. |
b | The tensor to be subtracted. |
destination | Where the result of the subtraction should be stored. |
Sum all values of a tensor.
tensor | The tensor to sum. |