nnlib
GPU-accelerated, C/C++ neural network library.
Macros | Functions
tensor_operations_on_host.cpp File Reference

Source file defining tensor operations that happen on host. More...

#include "tensor_operations_on_host.h"
#include <cmath>
#include <immintrin.h>

Macros

#define SET_ROW_TO_ZERO(index)   __m256 v##index = _mm256_setzero_ps()
 Set a row of an 8x8 tile to 0.
 
#define SET_ALL_ROWS_TO_ZERO()
 Set all rows of an 8x8 tile to 0. More...
 
#define COMPUTE_ROW(index)
 Compute a single row of an 8x8 tile. More...
 
#define COMPUTE_ALL_ROWS()
 Compute all rows of an 8x8 tile. More...
 
#define STORE_ROW(index)   _mm256_storeu_ps(destination.data + (row * 8 + (index)) * m + column * 8, v##index)
 Store a row of 8x8 tile to the result matrix.
 
#define STORE_ALL_ROWS()
 Store all rows of an 8x8 tile to the result matrix. More...
 

Functions

float horizontalAdd (__m256 value)
 Perform a horizontal add of a __m256 value. More...
 
void sumTensorOnHost (const Tensor &tensor, Tensor &destination)
 Sum all values of a tensor. More...
 
void fillTensorOnHost (Tensor &tensor, float value)
 Fill a tensor with a constant value. More...
 
void fillTensorOnHost (Tensor &tensor, const Tensor &value)
 
void addTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination)
 Element-wise add two tensors. More...
 
void subtractTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination)
 Subtract one tensor from another. More...
 
void hadamardTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination)
 Perform hadamard product (element-wise multiplication) between two tensors. More...
 
void divideTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination)
 Divide one tensor by another. More...
 
void logTensorOnHost (const Tensor &a, Tensor &destination)
 Apply natural logarithm to each element of the tensor. More...
 
void addBroadcastOnHost (const Tensor &matrix, const Tensor &vector, Tensor &destination)
 Perform the broadcast-add operation. More...
 
void multiplyTensorOnHost (const Tensor &tensor, float constant, Tensor &destination)
 Multiply a tensor with a constant. More...
 
void multiplyMatrixVectorOnHost (const Tensor &matrix, const Tensor &vector, Tensor &destination)
 Multiply a matrix with a vector. More...
 
void naiveMatMul (const Tensor &m1, const Tensor &m2, Tensor &destination, size_t rowStart=0, size_t columnStart=0)
 Naive matrix-matrix multiplication. More...
 
void multiplyMatrixMatrixOnHost (const Tensor &m1, const Tensor &m2, Tensor &destination)
 Multiply a matrix with a matrix. More...
 
void transposeMatrixOnHost (const Tensor &matrix, Tensor &destination)
 Transpose a matrix. More...
 
void reluTensorOnHost (const Tensor &tensor, Tensor &destination)
 
void reluDerivativeTensorOnHost (const Tensor &tensor, Tensor &destination)
 
void sigmoidTensorOnHost (const Tensor &tensor, Tensor &destination)
 

Detailed Description

Source file defining tensor operations that happen on host.

Author
Jan Warchocki
Date
29 August 2022

Macro Definition Documentation

◆ COMPUTE_ALL_ROWS

#define COMPUTE_ALL_ROWS ( )
Value:
COMPUTE_ROW(1); \
COMPUTE_ROW(2); \
COMPUTE_ROW(3); \
COMPUTE_ROW(4); \
COMPUTE_ROW(5); \
COMPUTE_ROW(6); \
COMPUTE_ROW(7)
#define COMPUTE_ROW(index)
Compute a single row of an 8x8 tile.
Definition: tensor_operations_on_host.cpp:307

Compute all rows of an 8x8 tile.

◆ COMPUTE_ROW

#define COMPUTE_ROW (   index)
Value:
const __m256 m1ColumnValue##index = _mm256_broadcast_ss(m1.data + (row * 8 + (index)) * k + i); \
const __m256 mulResult##index = _mm256_mul_ps(m2Row, m1ColumnValue##index); \
v##index = _mm256_add_ps(mulResult##index, v##index)

Compute a single row of an 8x8 tile.

◆ SET_ALL_ROWS_TO_ZERO

#define SET_ALL_ROWS_TO_ZERO ( )
Value:
SET_ROW_TO_ZERO(1); \
SET_ROW_TO_ZERO(2); \
SET_ROW_TO_ZERO(3); \
SET_ROW_TO_ZERO(4); \
SET_ROW_TO_ZERO(5); \
SET_ROW_TO_ZERO(6); \
SET_ROW_TO_ZERO(7)
#define SET_ROW_TO_ZERO(index)
Set a row of an 8x8 tile to 0.
Definition: tensor_operations_on_host.cpp:289

Set all rows of an 8x8 tile to 0.

◆ STORE_ALL_ROWS

#define STORE_ALL_ROWS ( )
Value:
STORE_ROW(0); \
STORE_ROW(1); \
STORE_ROW(2); \
STORE_ROW(3); \
STORE_ROW(4); \
STORE_ROW(5); \
STORE_ROW(6); \
STORE_ROW(7)
#define STORE_ROW(index)
Store a row of 8x8 tile to the result matrix.
Definition: tensor_operations_on_host.cpp:328

Store all rows of an 8x8 tile to the result matrix.

Function Documentation

◆ addBroadcastOnHost()

void addBroadcastOnHost ( const Tensor matrix,
const Tensor vector,
Tensor destination 
)

Perform the broadcast-add operation.

Parameters
matrixThe matrix tensor.
vectorThe vector tensor.
destinationWhere the result of the addition should be stored.

◆ addTensorsOnHost()

void addTensorsOnHost ( const Tensor a,
const Tensor b,
Tensor destination 
)

Element-wise add two tensors.

Parameters
aThe first tensor.
bThe second tensor.
destinationWhere the result of the addition should be stored.

◆ divideTensorsOnHost()

void divideTensorsOnHost ( const Tensor a,
const Tensor b,
Tensor destination 
)

Divide one tensor by another.

Parameters
aThe tensor to divide.
bThe tensor to divide by.
destinationWhere the result of the operation should be stored.

◆ fillTensorOnHost()

void fillTensorOnHost ( Tensor tensor,
float  value 
)

Fill a tensor with a constant value.

Parameters
tensorThe tensor to fill.
valueThe value to fill the tensor with.

◆ hadamardTensorsOnHost()

void hadamardTensorsOnHost ( const Tensor a,
const Tensor b,
Tensor destination 
)

Perform hadamard product (element-wise multiplication) between two tensors.

Parameters
aThe first tensor.
bThe second tensor.
destinationWhere the result of the operation should be stored.

◆ horizontalAdd()

float horizontalAdd ( __m256  value)

Perform a horizontal add of a __m256 value.

This adds all 8 floats in such a value together.

Parameters
valueThe __m256 variable whose floats should be summed up.
Returns
A float value corresponding to the sum.

◆ logTensorOnHost()

void logTensorOnHost ( const Tensor a,
Tensor destination 
)

Apply natural logarithm to each element of the tensor.

Parameters
aThe tensor to apply natural logarithm to.
destinationWhere the result of the operation should be stored.

◆ multiplyMatrixMatrixOnHost()

void multiplyMatrixMatrixOnHost ( const Tensor m1,
const Tensor m2,
Tensor destination 
)

Multiply a matrix with a matrix.

Parameters
m1The first matrix tensor.
m2The second matrix tensor.
destinationWhere the result of the multiplication should be stored.

◆ multiplyMatrixVectorOnHost()

void multiplyMatrixVectorOnHost ( const Tensor matrix,
const Tensor vector,
Tensor destination 
)

Multiply a matrix with a vector.

Parameters
matrixThe matrix tensor.
vectorThe vector tensor.
destinationWhere the result of the multiplication should be stored.

◆ multiplyTensorOnHost()

void multiplyTensorOnHost ( const Tensor tensor,
float  constant,
Tensor destination 
)

Multiply a tensor with a constant.

Parameters
tensorThe tensor to multiply.
constantThe constant to multiply with.
destinationWhere the result of the multiplication should be stored.

◆ naiveMatMul()

void naiveMatMul ( const Tensor m1,
const Tensor m2,
Tensor destination,
size_t  rowStart = 0,
size_t  columnStart = 0 
)

Naive matrix-matrix multiplication.

The method takes additional rowStart and columnStart arguments. These are used for edge cases in SIMD implementation of matrix-matrix multiplication. They correspondingly signify which row and which column to start the computation at.

Parameters
m1The first operand matrix of the multiplication.
m2The second operand matrix of the multiplication.
destinationWhere the result of the multiplication should be stored.
rowStartSpecify which rows should be computed.
columnStartSpecify which columns should be computed.

◆ subtractTensorsOnHost()

void subtractTensorsOnHost ( const Tensor a,
const Tensor b,
Tensor destination 
)

Subtract one tensor from another.

Parameters
aThe tensor to subtract from.
bThe tensor to be subtracted.
destinationWhere the result of the subtraction should be stored.

◆ sumTensorOnHost()

void sumTensorOnHost ( const Tensor tensor,
Tensor destination 
)

Sum all values of a tensor.

Parameters
tensorThe tensor to sum.
Returns
The sum of all values of a tensor.

◆ transposeMatrixOnHost()

void transposeMatrixOnHost ( const Tensor matrix,
Tensor destination 
)

Transpose a matrix.

Parameters
matrixThe matrix vector to transpose.
destinationWhere the result of the transpose operation should be stored.