Source file defining tensor operations that happen on host. More...

#include "tensor_operations_on_host.h"
#include <cmath>
#include <immintrin.h>

Macros
#define	SET_ROW_TO_ZERO(index) __m256 v##index = _mm256_setzero_ps()
	Set a row of an 8x8 tile to 0.

#define	SET_ALL_ROWS_TO_ZERO()
	Set all rows of an 8x8 tile to 0. More...

#define	COMPUTE_ROW(index)
	Compute a single row of an 8x8 tile. More...

#define	COMPUTE_ALL_ROWS()
	Compute all rows of an 8x8 tile. More...

#define	STORE_ROW(index) _mm256_storeu_ps(destination.data + (row * 8 + (index)) * m + column * 8, v##index)
	Store a row of 8x8 tile to the result matrix.

#define	STORE_ALL_ROWS()
	Store all rows of an 8x8 tile to the result matrix. More...

Functions
float	horizontalAdd (__m256 value)
	Perform a horizontal add of a `__m256` value. More...

void	sumTensorOnHost (const Tensor &tensor, Tensor &destination)
	Sum all values of a tensor. More...

void	fillTensorOnHost (Tensor &tensor, float value)
	Fill a tensor with a constant value. More...

void	fillTensorOnHost (Tensor &tensor, const Tensor &value)

void	addTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination)
	Element-wise add two tensors. More...

void	subtractTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination)
	Subtract one tensor from another. More...

void	hadamardTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination)
	Perform hadamard product (element-wise multiplication) between two tensors. More...

void	divideTensorsOnHost (const Tensor &a, const Tensor &b, Tensor &destination)
	Divide one tensor by another. More...

void	logTensorOnHost (const Tensor &a, Tensor &destination)
	Apply natural logarithm to each element of the tensor. More...

void	addBroadcastOnHost (const Tensor &matrix, const Tensor &vector, Tensor &destination)
	Perform the broadcast-add operation. More...

void	multiplyTensorOnHost (const Tensor &tensor, float constant, Tensor &destination)
	Multiply a tensor with a constant. More...

void	multiplyMatrixVectorOnHost (const Tensor &matrix, const Tensor &vector, Tensor &destination)
	Multiply a matrix with a vector. More...

void	naiveMatMul (const Tensor &m1, const Tensor &m2, Tensor &destination, size_t rowStart=0, size_t columnStart=0)
	Naive matrix-matrix multiplication. More...

void	multiplyMatrixMatrixOnHost (const Tensor &m1, const Tensor &m2, Tensor &destination)
	Multiply a matrix with a matrix. More...

void	transposeMatrixOnHost (const Tensor &matrix, Tensor &destination)
	Transpose a matrix. More...

void	reluTensorOnHost (const Tensor &tensor, Tensor &destination)

void	reluDerivativeTensorOnHost (const Tensor &tensor, Tensor &destination)

void	sigmoidTensorOnHost (const Tensor &tensor, Tensor &destination)

Detailed Description

Source file defining tensor operations that happen on host.

Author: Jan Warchocki

Date: 29 August 2022

Macro Definition Documentation

◆ COMPUTE_ALL_ROWS

#define COMPUTE_ALL_ROWS ( )

Value:

    COMPUTE_ROW(0); \
    COMPUTE_ROW(1); \
    COMPUTE_ROW(2); \
    COMPUTE_ROW(3); \
    COMPUTE_ROW(4); \
    COMPUTE_ROW(5); \
    COMPUTE_ROW(6); \
    COMPUTE_ROW(7)

Compute all rows of an 8x8 tile.

◆ COMPUTE_ROW

#define COMPUTE_ROW ( index )

Value:

    const __m256 m1ColumnValue##index = _mm256_broadcast_ss(m1.data + (row * 8 + (index)) * k + i); \
    const __m256 mulResult##index = _mm256_mul_ps(m2Row, m1ColumnValue##index); \
    v##index = _mm256_add_ps(mulResult##index, v##index)

Compute a single row of an 8x8 tile.

◆ SET_ALL_ROWS_TO_ZERO

#define SET_ALL_ROWS_TO_ZERO ( )

Value:

    SET_ROW_TO_ZERO(0); \
    SET_ROW_TO_ZERO(1); \
    SET_ROW_TO_ZERO(2); \
    SET_ROW_TO_ZERO(3); \
    SET_ROW_TO_ZERO(4); \
    SET_ROW_TO_ZERO(5); \
    SET_ROW_TO_ZERO(6); \
    SET_ROW_TO_ZERO(7)

Set all rows of an 8x8 tile to 0.

◆ STORE_ALL_ROWS

#define STORE_ALL_ROWS ( )

Value:

    STORE_ROW(0); \
    STORE_ROW(1); \
    STORE_ROW(2); \
    STORE_ROW(3); \
    STORE_ROW(4); \
    STORE_ROW(5); \
    STORE_ROW(6); \
    STORE_ROW(7)

Store all rows of an 8x8 tile to the result matrix.

Function Documentation

◆ addBroadcastOnHost()

void addBroadcastOnHost	(	const Tensor &	matrix,
		const Tensor &	vector,
		Tensor &	destination
	)

Perform the broadcast-add operation.

Parameters

matrix	The matrix tensor.
vector	The vector tensor.
destination	Where the result of the addition should be stored.

◆ addTensorsOnHost()

void addTensorsOnHost	(	const Tensor &	a,
		const Tensor &	b,
		Tensor &	destination
	)

Element-wise add two tensors.

Parameters

a	The first tensor.
b	The second tensor.
destination	Where the result of the addition should be stored.

◆ divideTensorsOnHost()

void divideTensorsOnHost	(	const Tensor &	a,
		const Tensor &	b,
		Tensor &	destination
	)

Divide one tensor by another.

Parameters

a	The tensor to divide.
b	The tensor to divide by.
destination	Where the result of the operation should be stored.

◆ fillTensorOnHost()

void fillTensorOnHost	(	Tensor &	tensor,
		float	value
	)

Fill a tensor with a constant value.

Parameters

tensor	The tensor to fill.
value	The value to fill the tensor with.

◆ hadamardTensorsOnHost()

void hadamardTensorsOnHost	(	const Tensor &	a,
		const Tensor &	b,
		Tensor &	destination
	)

Perform hadamard product (element-wise multiplication) between two tensors.

Parameters

a	The first tensor.
b	The second tensor.
destination	Where the result of the operation should be stored.

◆ horizontalAdd()

float horizontalAdd ( __m256 value )

Perform a horizontal add of a __m256 value.

This adds all 8 floats in such a value together.

Parameters

value The __m256 variable whose floats should be summed up.

Returns: A float value corresponding to the sum.

◆ logTensorOnHost()

void logTensorOnHost	(	const Tensor &	a,
		Tensor &	destination
	)

Apply natural logarithm to each element of the tensor.

Parameters

a	The tensor to apply natural logarithm to.
destination	Where the result of the operation should be stored.

◆ multiplyMatrixMatrixOnHost()

void multiplyMatrixMatrixOnHost	(	const Tensor &	m1,
		const Tensor &	m2,
		Tensor &	destination
	)

Multiply a matrix with a matrix.

Parameters

m1	The first matrix tensor.
m2	The second matrix tensor.
destination	Where the result of the multiplication should be stored.

◆ multiplyMatrixVectorOnHost()

void multiplyMatrixVectorOnHost	(	const Tensor &	matrix,
		const Tensor &	vector,
		Tensor &	destination
	)

Multiply a matrix with a vector.

Parameters

matrix	The matrix tensor.
vector	The vector tensor.
destination	Where the result of the multiplication should be stored.

◆ multiplyTensorOnHost()

void multiplyTensorOnHost	(	const Tensor &	tensor,
		float	constant,
		Tensor &	destination
	)

Multiply a tensor with a constant.

Parameters

tensor	The tensor to multiply.
constant	The constant to multiply with.
destination	Where the result of the multiplication should be stored.

◆ naiveMatMul()

void naiveMatMul	(	const Tensor &	m1,
		const Tensor &	m2,
		Tensor &	destination,
		size_t	rowStart = `0`,
		size_t	columnStart = `0`
	)

Naive matrix-matrix multiplication.

The method takes additional rowStart and columnStart arguments. These are used for edge cases in SIMD implementation of matrix-matrix multiplication. They correspondingly signify which row and which column to start the computation at.

Parameters

m1	The first operand matrix of the multiplication.
m2	The second operand matrix of the multiplication.
destination	Where the result of the multiplication should be stored.
rowStart	Specify which rows should be computed.
columnStart	Specify which columns should be computed.

◆ subtractTensorsOnHost()

void subtractTensorsOnHost	(	const Tensor &	a,
		const Tensor &	b,
		Tensor &	destination
	)

Subtract one tensor from another.

Parameters

a	The tensor to subtract from.
b	The tensor to be subtracted.
destination	Where the result of the subtraction should be stored.

◆ sumTensorOnHost()

void sumTensorOnHost	(	const Tensor &	tensor,
		Tensor &	destination
	)

Sum all values of a tensor.

Parameters

tensor The tensor to sum.

Returns: The sum of all values of a tensor.

◆ transposeMatrixOnHost()

void transposeMatrixOnHost	(	const Tensor &	matrix,
		Tensor &	destination
	)

Transpose a matrix.

Parameters

matrix	The matrix vector to transpose.
destination	Where the result of the transpose operation should be stored.

Macros

Functions

Detailed Description

Macro Definition Documentation

◆ COMPUTE_ALL_ROWS

◆ COMPUTE_ROW

◆ SET_ALL_ROWS_TO_ZERO

◆ STORE_ALL_ROWS

Function Documentation

◆ addBroadcastOnHost()

◆ addTensorsOnHost()

◆ divideTensorsOnHost()

◆ fillTensorOnHost()

◆ hadamardTensorsOnHost()

◆ horizontalAdd()

◆ logTensorOnHost()

◆ multiplyMatrixMatrixOnHost()

◆ multiplyMatrixVectorOnHost()

◆ multiplyTensorOnHost()

◆ naiveMatMul()

◆ subtractTensorsOnHost()

◆ sumTensorOnHost()

◆ transposeMatrixOnHost()