nnlib
GPU-accelerated, C/C++ neural network library.
Titanic

Training a neural network on the Titanic dataset.

The dataset can be downloaded from https://www.kaggle.com/c/titanic. Since nnlib does not support feature engineering, the dataset needs to be prepared using the following Python script:

1 import numpy as np
2 import pandas as pd
3 import sys
4 from sklearn.preprocessing import MinMaxScaler
5 
6 
7 def scale(X, column):
8  scaler = MinMaxScaler()
9  transformed = scaler.fit_transform(X[:, column].reshape(-1, 1))
10  X[:, column] = transformed[:, 0]
11 
12 
13 def main(argv):
14  if len(argv) < 2:
15  print('Filepath to the train file needs to be specified')
16  return
17 
18  filepath = argv[1]
19  print(f'Processing {filepath}')
20 
21  dataset = pd.read_csv(filepath)
22  dataset['Age'].fillna(dataset['Age'].mean(), inplace=True)
23  dataset = pd.get_dummies(dataset, prefix=['Sex'], columns=['Sex'], drop_first=True)
24  dataset = dataset.drop(['PassengerId', 'Pclass', 'Name', 'Ticket', 'Cabin', 'Embarked'], axis=1)
25  print(dataset.head())
26 
27  X = dataset.iloc[:, 1:].to_numpy()
28  y = dataset.iloc[:, 0].to_numpy()
29  print(X)
30 
31  scale(X, 0) # Scale age
32  scale(X, 1) # Scale SibSp
33  scale(X, 2) # Scale Parch
34  scale(X, 3) # Scale Fare
35  print(X)
36 
37  dfX = pd.DataFrame(X)
38  dfy = pd.DataFrame(y)
39  dfX['5'] = dfy # Add the targets as the last column
40  dfX.to_csv('./out/dataset.csv', header=None, index=False)
41 
42 
43 if __name__ == '__main__':
44  main(sys.argv)
45 

The Python script expects one parameter, which is the full path to the Titanic dataset. The dataset prepared for nnlib will then be generated in the ./out directory.

The main.cpp file expects one argument which is the absolute path to the file prepared by the Python script.

#include <iostream>
#include <nnlib/network.h>
#include <nnlib/read.h>
#include <nnlib/verify.cuh>
#include <nnlib/onehot_encode.h>
#include <chrono>
int main(int argc, char** argv) {
if (argc < 2) {
std::cout << "Dataset file path was not specified." << std::endl;
return 1;
}
sTensor dataset = readCSV(argv[1], ",", 4);
sTensor X = std::make_shared<Tensor>(dataset->shape[0], dataset->shape[1] - 1);
sTensor y = std::make_shared<Tensor>(dataset->shape[0], 1);
for (int i = 0; i < dataset->shape[0]; i++) {
y->data[i] = dataset->data[i * dataset->shape[1] + dataset->shape[1] - 1];
for (int j = 0; j < dataset->shape[1] - 1; j++) {
X->data[i * X->shape[1] + j] = dataset->data[i * dataset->shape[1] + j];
}
}
std::cout << y << std::endl;
Network network = Network(X->shape[1]);
network.add(64);
network.add(y->shape[1], "sigmoid");
std::vector<Metric*> metrics = {new BinaryAccuracy(), new MeanSquaredError()};
network.train(X, y, 100, 10, 0.01, new BinaryCrossEntropy(), metrics);
return 0;
}
The implementation of binary accuracy.
Definition: metric.h:97
Class representing the Binary Cross Entropy.
Definition: loss.h:60
Class representing the Mean Squared Error.
Definition: loss.h:48
Represents a neural network.
Definition: network.h:23
void train(sTensor &X, sTensor &y, int epochs, size_t batchSize, float learningRate, Loss *loss, std::vector< Metric * > &metrics)
Train the network.
Definition: network.cpp:198
void add(size_t numNeurons, const std::string &activation="linear")
Add a new layer to the network.
Definition: network.cpp:178
sTensor readCSV(const std::string &filepath, const std::string &delim, int numThreads)
Read a csv file from a path.
Definition: read.cpp:107
void showCudaInfo()
Show information about CUDA and the available GPU(s).
Definition: verify.cu:17

The project can be built with the following CMake script. This script requires CMAKE_PREFIX_PATH to be set to the install directory of nnlib.

cmake_minimum_required(VERSION 3.20)
include(CheckLanguage)
project(titanic_nnlib LANGUAGES CXX)
set(CMAKE_CXX_STANDARD 14)
find_package(nnlib CONFIG REQUIRED)
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
enable_language(CUDA)
set(CMAKE_CUDA_STANDARD 14)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
# Add flag to specify the architecture of the GPU (compute capability)
if (NOT DEFINED CUDA_ARCHITECTURES)
set_target_properties(nnlib PROPERTIES CUDA_ARCHITECTURES "50")
endif()
set_target_properties(nnlib PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON)
endif()
add_executable(${PROJECT_NAME} main.cpp)
target_link_libraries(${PROJECT_NAME} PRIVATE nnlib)
Definition: functions.h:68