@conference{279, keywords = {Non-linear activation function, Generalization, Activation distribution, Sparsity}, author = {Arnold Pretorius and Etienne Barnard and Marelie Davel}, title = {ReLU and sigmoidal activation functions}, abstract = {The generalization capabilities of deep neural networks are not well understood, and in particular, the influence of activation functions on generalization has received little theoretical attention. Phenomena such as vanishing gradients, node saturation and network sparsity have been identified as possible factors when comparing different activation functions [1]. We investigate these factors using fully connected feedforward networks on two standard benchmark problems, and find that the most salient differences between networks with sigmoidal and ReLU activations relate to the way that class-distinctive information is propagated through a network.}, year = {2019}, journal = {South African Forum for Artificial Intelligence Research (FAIR)}, chapter = {37-48}, month = {04/12-07/12}, publisher = {CEUR Workshop Proceedings}, address = {Cape Town, South Africa}, }