Source file defining tensor operations that happen on device. More...

#include "tensor_operations_on_device.cuh"
#include <cmath>
#include <exceptions/unexpected_cuda_call_exception.h>
#include <gpu/assert.cuh>

Macros
#define	TILE_WIDTH 16
	Size of a tile when performing tiled matrix multiplication.

Functions
__global__ void	sumTensorKernel (const float a, float destination, size_t size, size_t n)

__global__ void	fillTensorKernel (float *tensor, float value, size_t size)
	Kernel method to fill a tensor with a value. More...

__global__ void	fillTensorKernel (float tensor, const float value, size_t size)

__global__ void	addTensorsKernel (const float a, const float b, float *destination, size_t size)
	Kernel method to add two tensors together. More...

__global__ void	addBroadcastKernel (const float matrix, const float vector, float *destination, size_t n, size_t m)
	Kernel method to perform broadcast-add operation. More...

__global__ void	subtractTensorsKernel (const float a, const float b, float *destination, size_t size)
	Kernel method to perform tensor subtraction. More...

__global__ void	mulMatrixVectorKernel (const float matrix, const float vector, float *destination, size_t n, size_t m)
	Kernel method to perform matrix-vector multiplication. More...

__global__ void	multiplyMatricesTilingKernel (const float m1, const float m2, float *destination, size_t n, size_t m, size_t k)
	Kernel method to multiply two matrices using a tiling method. More...

__global__ void	multiplyMatricesNoTilingKernel (const float m1, const float m2, float *destination, size_t n, size_t m, size_t k)
	Kernel method to perform naive matrix-matrix multiplication. More...

__global__ void	multiplyTensorKernel (const float tensor, float constant, float destination, size_t size)
	Kernel method to multiply a tensor with a constant. More...

__global__ void	hadamardTensorsKernel (const float a, const float b, float *destination, size_t size)
	Kernel method to apply hadamard product to two tensors. More...

__global__ void	divideTensorsKernel (const float a, const float b, float *destination, size_t size)
	Kernel method to element-wise divide one tensor by another. More...

__global__ void	logTensorKernel (const float a, float destination, size_t size)
	Kernel method to apply the natural logarithm to each element of the tensor. More...

__global__ void	transposeMatrixKernel (const float matrix, float destination, size_t n, size_t m)
	Kernel method to transpose a matrix. More...

__global__ void	reluKernel (const float input, float result, size_t size)

__global__ void	reluDerivativeKernel (const float output, float result, size_t size)

__global__ void	sigmoidKernel (float input, float result, size_t size)

void	sumTensorOnDevice (const Tensor &tensor, Tensor &destination)

void	fillTensorOnDevice (Tensor &tensor, float value)
	Fill a tensor with a constant value. More...

void	fillTensorOnDevice (Tensor &tensor, const Tensor &value)

void	addTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination)
	Element-wise add two tensors. More...

void	subtractTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination)
	Subtract one tensor from another. More...

void	hadamardTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination)
	Perform hadamard product (element-wise multiplication) between two tensors. More...

void	divideTensorsOnDevice (const Tensor &a, const Tensor &b, Tensor &destination)
	Divide one tensor by another. More...

void	logTensorOnDevice (const Tensor &a, Tensor &destination)
	Apply natural logarithm to each element of the tensor. More...

void	addBroadcastOnDevice (const Tensor &matrix, const Tensor &vector, Tensor &destination)
	Perform the broadcast-add operation. More...

void	multiplyTensorOnDevice (const Tensor &tensor, float constant, Tensor &destination)
	Multiply a tensor with a constant. More...

void	multiplyMatrixVectorOnDevice (const Tensor &matrix, const Tensor &vector, Tensor &destination)
	Multiply a matrix with a vector. More...

void	multiplyMatrixMatrixOnDevice (const Tensor &m1, const Tensor &m2, Tensor &destination)
	Multiply a matrix with a matrix. More...

void	transposeMatrixOnDevice (const Tensor &matrix, Tensor &destination)
	Transpose a matrix. More...

void	reluTensorOnDevice (const Tensor &tensor, Tensor &destination)

void	reluDerivativeTensorOnDevice (const Tensor &tensor, Tensor &destination)

void	sigmoidTensorOnDevice (const Tensor &tensor, Tensor &destination)

Detailed Description

Source file defining tensor operations that happen on device.

Author: Jan Warchocki

Date: 29 August 2022

Function Documentation

◆ addBroadcastKernel()

__global__ void addBroadcastKernel	(	const float *	matrix,
		const float *	vector,
		float *	destination,
		size_t	n,
		size_t	m
	)

Kernel method to perform broadcast-add operation.

Parameters

matrix	The data of the matrix.
vector	The data of the vector to broadcast.
destination	Where the result of the operation should be stored.
n	The number of rows of the matrix.
m	The number of columns of the matrix. Same as the size of the vector.

◆ addBroadcastOnDevice()

void addBroadcastOnDevice	(	const Tensor &	matrix,
		const Tensor &	vector,
		Tensor &	destination
	)

Perform the broadcast-add operation.

Parameters

matrix	The matrix tensor.
vector	The vector tensor.
destination	Where the result of the addition should be stored.

◆ addTensorsKernel()

__global__ void addTensorsKernel	(	const float *	a,
		const float *	b,
		float *	destination,
		size_t	size
	)

Kernel method to add two tensors together.

Parameters

a	The data of the first tensor.
b	The data of the second tensor.
destination	Where the result of the operation should be stored.
size	The size of the tensors.

◆ addTensorsOnDevice()

void addTensorsOnDevice	(	const Tensor &	a,
		const Tensor &	b,
		Tensor &	destination
	)

Element-wise add two tensors.

Parameters

a	The first tensor.
b	The second tensor.
destination	Where the result of the addition should be stored.

◆ divideTensorsKernel()

__global__ void divideTensorsKernel	(	const float *	a,
		const float *	b,
		float *	destination,
		size_t	size
	)

Kernel method to element-wise divide one tensor by another.

Parameters

a	The data of the first tensor.
b	The data of the second tensor.
destination	Where the result of the division should be stored.
size	The size of the tensors.

◆ divideTensorsOnDevice()

void divideTensorsOnDevice	(	const Tensor &	a,
		const Tensor &	b,
		Tensor &	destination
	)

Divide one tensor by another.

Parameters

a	The tensor to divide.
b	The tensor to divide by.
destination	Where the result of the operation should be stored.

◆ fillTensorKernel()

__global__ void fillTensorKernel	(	float *	tensor,
		float	value,
		size_t	size
	)

Kernel method to fill a tensor with a value.

Parameters

tensor	The tensor to fill.
value	The value to fill the tensor with.
size	The size of the tensor.

◆ fillTensorOnDevice()

void fillTensorOnDevice	(	Tensor &	tensor,
		float	value
	)

Fill a tensor with a constant value.

Parameters

tensor	The tensor to fill.
value	The value to fill the tensor with.

◆ hadamardTensorsKernel()

__global__ void hadamardTensorsKernel	(	const float *	a,
		const float *	b,
		float *	destination,
		size_t	size
	)

Kernel method to apply hadamard product to two tensors.

Parameters

a	The data of the first tensor.
b	The data of the second tensor.
destination	Where the result of the hadamard operation should be stored.
size	The size of the tensors.

◆ hadamardTensorsOnDevice()

void hadamardTensorsOnDevice	(	const Tensor &	a,
		const Tensor &	b,
		Tensor &	destination
	)

Perform hadamard product (element-wise multiplication) between two tensors.

Parameters

a	The first tensor.
b	The second tensor.
destination	Where the result of the operation should be stored.

◆ logTensorKernel()

__global__ void logTensorKernel	(	const float *	a,
		float *	destination,
		size_t	size
	)

Kernel method to apply the natural logarithm to each element of the tensor.

Parameters

a	The data of the tensor to apply natural logarithm to.
destination	Where the result of the natural logarithm should be stored.
size	The size of the tensor.

◆ logTensorOnDevice()

void logTensorOnDevice	(	const Tensor &	a,
		Tensor &	destination
	)

Apply natural logarithm to each element of the tensor.

Parameters

a	The tensor to apply natural logarithm to.
destination	Where the result of the operation should be stored.

◆ mulMatrixVectorKernel()

__global__ void mulMatrixVectorKernel	(	const float *	matrix,
		const float *	vector,
		float *	destination,
		size_t	n,
		size_t	m
	)

Kernel method to perform matrix-vector multiplication.

Parameters

matrix	The data of the matrix to multiply.
vector	The data of the vector to multiply.
destination	Where the result of the operation should be stored.
n	The number of rows of the matrix.
m	The number of columns of the matrix. Same as the size of the vector.

◆ multiplyMatricesNoTilingKernel()

__global__ void multiplyMatricesNoTilingKernel	(	const float *	m1,
		const float *	m2,
		float *	destination,
		size_t	n,
		size_t	m,
		size_t	k
	)

Kernel method to perform naive matrix-matrix multiplication.

Parameters

m1	The data of the first matrix.
m2	The data of the second matrix.
destination	Where the result of the operation should be stored.
n	The number of rows of the first matrix.
m	The number of columns of the first matrix.
k	The number of columns of the second matrix.

◆ multiplyMatricesTilingKernel()

__global__ void multiplyMatricesTilingKernel	(	const float *	m1,
		const float *	m2,
		float *	destination,
		size_t	n,
		size_t	m,
		size_t	k
	)

Kernel method to multiply two matrices using a tiling method.

This method currently is not used to perform matrix-matrix multiplication for performance reasons.

Parameters

m1	The data of the first matrix.
m2	The data of the second matrix.
destination	Where the result of the operation should be stored.
n	The number of rows of the first matrix.
m	The number of columns of the first matrix.
k	The number of columns of the second matrix.

◆ multiplyMatrixMatrixOnDevice()

void multiplyMatrixMatrixOnDevice	(	const Tensor &	m1,
		const Tensor &	m2,
		Tensor &	destination
	)

Multiply a matrix with a matrix.

Parameters

m1	The first matrix tensor.
m2	The second matrix tensor.
destination	Where the result of the multiplication should be stored.

◆ multiplyMatrixVectorOnDevice()

void multiplyMatrixVectorOnDevice	(	const Tensor &	matrix,
		const Tensor &	vector,
		Tensor &	destination
	)

Multiply a matrix with a vector.

Parameters

matrix	The matrix tensor.
vector	The vector tensor.
destination	Where the result of the multiplication should be stored.

◆ multiplyTensorKernel()

__global__ void multiplyTensorKernel	(	const float *	tensor,
		float	constant,
		float *	destination,
		size_t	size
	)

Kernel method to multiply a tensor with a constant.

Parameters

tensor	The data of the tensor to multiply.
constant	The constant to multiply `tensor` with.
destination	Where the result of the operation should be stored.
size	The size of the tensor.

◆ multiplyTensorOnDevice()

void multiplyTensorOnDevice	(	const Tensor &	tensor,
		float	constant,
		Tensor &	destination
	)

Multiply a tensor with a constant.

Parameters

tensor	The tensor to multiply.
constant	The constant to multiply with.
destination	Where the result of the multiplication should be stored.

◆ subtractTensorsKernel()

__global__ void subtractTensorsKernel	(	const float *	a,
		const float *	b,
		float *	destination,
		size_t	size
	)

Kernel method to perform tensor subtraction.

Parameters

a	The data of the tensor to subtract from.
b	The data of the tensor to be subtracted.
destination	Where the result of the operation should be stored.
size	The size of the tensors.

◆ subtractTensorsOnDevice()

void subtractTensorsOnDevice	(	const Tensor &	a,
		const Tensor &	b,
		Tensor &	destination
	)

Subtract one tensor from another.

Parameters

a	The tensor to subtract from.
b	The tensor to be subtracted.
destination	Where the result of the subtraction should be stored.

◆ transposeMatrixKernel()

__global__ void transposeMatrixKernel	(	const float *	matrix,
		float *	destination,
		size_t	n,
		size_t	m
	)

Kernel method to transpose a matrix.

Parameters

matrix	The data of the matrix to transpose.
destination	Where the result of the transpose operation should be stored.
n	The number of rows of `matrix`.
m	The number of columns of `matrix`.

◆ transposeMatrixOnDevice()

void transposeMatrixOnDevice	(	const Tensor &	matrix,
		Tensor &	destination
	)

Transpose a matrix.

Parameters

matrix	The matrix vector to transpose.
destination	Where the result of the transpose operation should be stored.

Macros

Functions

Detailed Description

Function Documentation

◆ addBroadcastKernel()

◆ addBroadcastOnDevice()

◆ addTensorsKernel()

◆ addTensorsOnDevice()

◆ divideTensorsKernel()

◆ divideTensorsOnDevice()

◆ fillTensorKernel()

◆ fillTensorOnDevice()

◆ hadamardTensorsKernel()

◆ hadamardTensorsOnDevice()

◆ logTensorKernel()

◆ logTensorOnDevice()

◆ mulMatrixVectorKernel()

◆ multiplyMatricesNoTilingKernel()

◆ multiplyMatricesTilingKernel()

◆ multiplyMatrixMatrixOnDevice()

◆ multiplyMatrixVectorOnDevice()

◆ multiplyTensorKernel()

◆ multiplyTensorOnDevice()

◆ subtractTensorsKernel()

◆ subtractTensorsOnDevice()

◆ transposeMatrixKernel()

◆ transposeMatrixOnDevice()