Research Publications
2020
When training neural networks as classifiers, it is common to observe an increase in average test loss while still maintaining or improving the overall classification accuracy on the same dataset. In spite of the ubiquity of this phenomenon, it has not been well studied and is often dismissively attributed to an increase in borderline correct classifications. We present an empirical investigation that shows how this phenomenon is actually a result of the differential manner by which test samples are processed. In essence: test loss does not increase overall, but only for a small minority of samples. Large representational capacities allow losses to decrease for the vast majority of test samples at the cost of extreme increases for others. This effect seems to be mainly caused by increased parameter values relating to the correctly processed sample features. Our findings contribute to the practical understanding of a common behaviour of deep neural networks. We also discuss the implications of this work for network optimisation and generalisation.
@article{484, author = {Arthur Venter and Marthinus Theunissen and Marelie Davel}, title = {Pre-interpolation loss behaviour in neural networks}, abstract = {When training neural networks as classifiers, it is common to observe an increase in average test loss while still maintaining or improving the overall classification accuracy on the same dataset. In spite of the ubiquity of this phenomenon, it has not been well studied and is often dismissively attributed to an increase in borderline correct classifications. We present an empirical investigation that shows how this phenomenon is actually a result of the differential manner by which test samples are processed. In essence: test loss does not increase overall, but only for a small minority of samples. Large representational capacities allow losses to decrease for the vast majority of test samples at the cost of extreme increases for others. This effect seems to be mainly caused by increased parameter values relating to the correctly processed sample features. Our findings contribute to the practical understanding of a common behaviour of deep neural networks. We also discuss the implications of this work for network optimisation and generalisation.}, year = {2020}, journal = {Communications in Computer and Information Science}, volume = {1342}, pages = {296-309}, publisher = {Southern African Conference for Artificial Intelligence Research}, address = {South Africa}, isbn = {978-3-030-66151-9}, doi = {https://doi.org/10.1007/978-3-030-66151-9_19}, }
Although Convolutional Neural Networks (CNNs) are widely used, their translation invariance (ability to deal with translated inputs) is still subject to some controversy. We explore this question using translation-sensitivity maps to quantify how sensitive a standard CNN is to a translated input. We propose the use of cosine similarity as sensitivity metric over Euclidean distance, and discuss the importance of restricting the dimensionality of either of these metrics when comparing architectures. Our main focus is to investigate the effect of different architectural components of a standard CNN on that network’s sensitivity to translation. By varying convolutional kernel sizes and amounts of zero padding, we control the size of the feature maps produced, allowing us to quantify the extent to which these elements influence translation invariance. We also measure translation invariance at different locations within the CNN to determine the extent to which convolutional and fully connected layers, respectively, contribute to the translation invariance of a CNN as a whole. Our analysis indicates that both convolutional kernel size and feature map size have a systematic influence on translation invariance. We also see that convolutional layers contribute less than expected to translation invariance, when not specifically forced to do so.
@article{485, author = {Johannes Myburgh and Coenraad Mouton and Marelie Davel}, title = {Tracking translation invariance in CNNs}, abstract = {Although Convolutional Neural Networks (CNNs) are widely used, their translation invariance (ability to deal with translated inputs) is still subject to some controversy. We explore this question using translation-sensitivity maps to quantify how sensitive a standard CNN is to a translated input. We propose the use of cosine similarity as sensitivity metric over Euclidean distance, and discuss the importance of restricting the dimensionality of either of these metrics when comparing architectures. Our main focus is to investigate the effect of different architectural components of a standard CNN on that network’s sensitivity to translation. By varying convolutional kernel sizes and amounts of zero padding, we control the size of the feature maps produced, allowing us to quantify the extent to which these elements influence translation invariance. We also measure translation invariance at different locations within the CNN to determine the extent to which convolutional and fully connected layers, respectively, contribute to the translation invariance of a CNN as a whole. Our analysis indicates that both convolutional kernel size and feature map size have a systematic influence on translation invariance. We also see that convolutional layers contribute less than expected to translation invariance, when not specifically forced to do so.}, year = {2020}, journal = {Communications in Computer and Information Science}, volume = {1342}, pages = {282-295}, publisher = {Southern African Conference for Artificial Intelligence Research}, isbn = {978-3-030-66151-9}, doi = {https://doi.org/10.1007/978-3-030-66151-9_18}, }
Convolutional Neural Networks have become the standard for image classification tasks, however, these architectures are not invariant to translations of the input image. This lack of invariance is attributed to the use of stride which subsamples the input, resulting in a loss of information, and fully connected layers which lack spatial reasoning. We show that stride can greatly benefit translation invariance given that it is combined with sufficient similarity between neighbouring pixels, a characteristic which we refer to as local homogeneity. We also observe that this characteristic is dataset-specific and dictates the relationship between pooling kernel size and stride required for translation invariance. Furthermore we find that a trade-off exists between generalization and translation invariance in the case of pooling kernel size, as larger kernel sizes lead to better invariance but poorer generalization. Finally we explore the efficacy of other solutions proposed, namely global average pooling, anti-aliasing, and data augmentation, both empirically and through the lens of local homogeneity.
@article{486, author = {Coenraad Mouton and Johannes Myburgh and Marelie Davel}, title = {Stride and translation invariance in CNNs}, abstract = {Convolutional Neural Networks have become the standard for image classification tasks, however, these architectures are not invariant to translations of the input image. This lack of invariance is attributed to the use of stride which subsamples the input, resulting in a loss of information, and fully connected layers which lack spatial reasoning. We show that stride can greatly benefit translation invariance given that it is combined with sufficient similarity between neighbouring pixels, a characteristic which we refer to as local homogeneity. We also observe that this characteristic is dataset-specific and dictates the relationship between pooling kernel size and stride required for translation invariance. Furthermore we find that a trade-off exists between generalization and translation invariance in the case of pooling kernel size, as larger kernel sizes lead to better invariance but poorer generalization. Finally we explore the efficacy of other solutions proposed, namely global average pooling, anti-aliasing, and data augmentation, both empirically and through the lens of local homogeneity.}, year = {2020}, journal = {Communications in Computer and Information Science}, volume = {1342}, pages = {267-281}, publisher = {Southern African Conference for Artificial Intelligence Research}, address = {South Africa}, isbn = {978-3-030-66151-9}, doi = {https://doi.org/10.1007/978-3-030-66151-9_17}, }
We investigate whether word embeddings using deep neural networks can assist in the analysis of text produced by a speechrecognition system. In particular, we develop algorithms to identify which words are incorrectly detected by a speech-recognition system in broadcast news. The multilingual corpus used in this investigation contains speech from the eleven official South African languages, as well as Hindi. Popular word embedding algorithms such as Word2Vec and fastText are investigated and compared with context-specific embedding representations such as Doc2Vec and non-context specific statistical sentence embedding methods such as term frequency-inverse document frequency (TFIDF), which is used as our baseline method. These various embeddding methods are then used as fixed length input representations for a logistic regression and feed forward neural network classifier. The output is used as an additional categorical input feature to a CatBoost classifier to determine whether the words were correctly recognised. Other methods are also investigated, including a method that uses the word embedding itself and cosine similarity between specific keywords to identify whether a specific keyword was correctly detected. When relying only on the speech-text data, the best result was obtained using the TFIDF document embeddings as input features to a feed forward neural network. Adding the output from the feed forward neural network as an additional feature to the CatBoost classifier did not enhance the classifier’s performance compared to using the non-textual information provided, although adding the output from a weaker classifier was somewhat beneficial
@{398, author = {Rhyno Strydom and Etienne Barnard}, title = {Classifying recognised speech with deep neural networks}, abstract = {We investigate whether word embeddings using deep neural networks can assist in the analysis of text produced by a speechrecognition system. In particular, we develop algorithms to identify which words are incorrectly detected by a speech-recognition system in broadcast news. The multilingual corpus used in this investigation contains speech from the eleven official South African languages, as well as Hindi. Popular word embedding algorithms such as Word2Vec and fastText are investigated and compared with context-specific embedding representations such as Doc2Vec and non-context specific statistical sentence embedding methods such as term frequency-inverse document frequency (TFIDF), which is used as our baseline method. These various embeddding methods are then used as fixed length input representations for a logistic regression and feed forward neural network classifier. The output is used as an additional categorical input feature to a CatBoost classifier to determine whether the words were correctly recognised. Other methods are also investigated, including a method that uses the word embedding itself and cosine similarity between specific keywords to identify whether a specific keyword was correctly detected. When relying only on the speech-text data, the best result was obtained using the TFIDF document embeddings as input features to a feed forward neural network. Adding the output from the feed forward neural network as an additional feature to the CatBoost classifier did not enhance the classifier’s performance compared to using the non-textual information provided, although adding the output from a weaker classifier was somewhat beneficial}, year = {2020}, journal = {Southern African Conference for Artificial Intelligence Research}, pages = {191-205}, month = {22/02/2021 - 26/02/2021}, publisher = {Southern African Conference for Artificial Intelligence Research}, address = {South Africa}, isbn = {978-0-620-89373-2}, }
The understanding of generalisation in machine learning is in a state of flux, in part due to the ability of deep learning models to interpolate noisy training data and still perform appropriately on out-of-sample data, thereby contradicting long-held intuitions about the bias-variance trade off in learning. We expand upon relevant existing work by discussing local attributes of neural network training within the context of a relatively simple framework.We describe how various types of noise can be compensated for within the proposed framework in order to allow the deep learning model to generalise in spite of interpolating spurious function descriptors. Empirically,we support our postulates with experiments involving overparameterised multilayer perceptrons and controlled training data noise. The main insights are that deep learning models are optimised for training data modularly, with different regions in the function space dedicated to fitting distinct types of sample information. Additionally,we show that models tend to fit uncorrupted samples first. Based on this finding, we propose a conjecture to explain an observed instance of the epoch-wise double-descent phenomenon. Our findings suggest that the notion of model capacity needs to be modified to consider the distributed way training data is fitted across sub-units.
@article{394, author = {Marthinus Theunissen and Marelie Davel and Etienne Barnard}, title = {Benign interpolation of noise in deep learning}, abstract = {The understanding of generalisation in machine learning is in a state of flux, in part due to the ability of deep learning models to interpolate noisy training data and still perform appropriately on out-of-sample data, thereby contradicting long-held intuitions about the bias-variance trade off in learning. We expand upon relevant existing work by discussing local attributes of neural network training within the context of a relatively simple framework.We describe how various types of noise can be compensated for within the proposed framework in order to allow the deep learning model to generalise in spite of interpolating spurious function descriptors. Empirically,we support our postulates with experiments involving overparameterised multilayer perceptrons and controlled training data noise. The main insights are that deep learning models are optimised for training data modularly, with different regions in the function space dedicated to fitting distinct types of sample information. Additionally,we show that models tend to fit uncorrupted samples first. Based on this finding, we propose a conjecture to explain an observed instance of the epoch-wise double-descent phenomenon. Our findings suggest that the notion of model capacity needs to be modified to consider the distributed way training data is fitted across sub-units.}, year = {2020}, journal = {South African Computer Journal}, volume = {32}, pages = {80-101}, issue = {2}, publisher = {South African Institute of Computer Scientists and Information Technologists}, isbn = {ISSN: 1015-7999; E:2313-7835}, doi = {https://doi.org/10.18489/sacj.v32i2.833}, }
Feedforward neural networks provide the basis for complex regression models that produce accurate predictions in a variety of applications. However, they generally do not explicitly provide any information about the utility of each of the input parameters in terms of their contribution to model accuracy. With this is mind, we develop the pairwise network, an adaptation to the fully connected feedforward network that allows the ranking of input parameters according to their contribution to the model output. The application is demonstrated in the context of a space physics problem. Geomagnetic storms are multi-day events characterised by significant perturbations to the magnetic field of the Earth, driven by solar activity. Previous storm forecasting efforts typically use solar wind measurements as input parameters to a regression problem tasked with predicting a perturbation index such as the 1-minute cadence symmetric-H (Sym-H) index. We re-visit the task of predicting Sym-H from solar wind parameters, with two 'twists': (i) Geomagnetic storm phase information is incorporated as model inputs and shown to increase prediction performance. (ii) We describe the pairwise network structure and training process - first validating ranking ability on synthetic data, before using the network to analyse the Sym-H problem.
@article{392, author = {Jacques Beukes and Marelie Davel and Stefan Lotz}, title = {Pairwise networks for feature ranking of a geomagnetic storm model}, abstract = {Feedforward neural networks provide the basis for complex regression models that produce accurate predictions in a variety of applications. However, they generally do not explicitly provide any information about the utility of each of the input parameters in terms of their contribution to model accuracy. With this is mind, we develop the pairwise network, an adaptation to the fully connected feedforward network that allows the ranking of input parameters according to their contribution to the model output. The application is demonstrated in the context of a space physics problem. Geomagnetic storms are multi-day events characterised by significant perturbations to the magnetic field of the Earth, driven by solar activity. Previous storm forecasting efforts typically use solar wind measurements as input parameters to a regression problem tasked with predicting a perturbation index such as the 1-minute cadence symmetric-H (Sym-H) index. We re-visit the task of predicting Sym-H from solar wind parameters, with two 'twists': (i) Geomagnetic storm phase information is incorporated as model inputs and shown to increase prediction performance. (ii) We describe the pairwise network structure and training process - first validating ranking ability on synthetic data, before using the network to analyse the Sym-H problem.}, year = {2020}, journal = {South African Computer Journal}, volume = {32}, pages = {35-55}, issue = {2}, publisher = {South African Institute of Computer Scientists and Information Technologists}, isbn = {ISSN: 1015-7999; E:2313-7835}, doi = {https://doi.org/10.18489/sacj.v32i2.860}, }
No framework exists that can explain and predict the generalisation ability of deep neural networks in general circumstances. In fact, this question has not been answered for some of the least complicated of neural network architectures: fully-connected feedforward networks with rectified linear activations and a limited number of hidden layers. For such an architecture, we show how adding a summary layer to the network makes it more amenable to analysis, and allows us to define the conditions that are required to guarantee that a set of samples will all be classified correctly. This process does not describe the generalisation behaviour of these networks,but produces a number of metrics that are useful for probing their learning and generalisation behaviour. We support the analytical conclusions with empirical results, both to confirm that the mathematical guarantees hold in practice, and to demonstrate the use of the analysis process.
@article{391, author = {Marelie Davel}, title = {Using summary layers to probe neural network behaviour}, abstract = {No framework exists that can explain and predict the generalisation ability of deep neural networks in general circumstances. In fact, this question has not been answered for some of the least complicated of neural network architectures: fully-connected feedforward networks with rectified linear activations and a limited number of hidden layers. For such an architecture, we show how adding a summary layer to the network makes it more amenable to analysis, and allows us to define the conditions that are required to guarantee that a set of samples will all be classified correctly. This process does not describe the generalisation behaviour of these networks,but produces a number of metrics that are useful for probing their learning and generalisation behaviour. We support the analytical conclusions with empirical results, both to confirm that the mathematical guarantees hold in practice, and to demonstrate the use of the analysis process.}, year = {2020}, journal = {South African Computer Journal}, volume = {32}, pages = {102-123}, issue = {2}, publisher = {South African Institute of Computer Scientists and Information Technologists}, isbn = {ISSN: 1015-7999; E:2313-7835}, url = {http://hdl.handle.net/10394/36916}, doi = {https://doi.org/10.18489/sacj.v32i2.861}, }
In this paper I provide an exposition and critique of the Organic View of Ethical Status, as outlined by Torrance (2008). A key presupposition of this view is that only moral patients can be moral agents. It is claimed that because artificial agents lack sentience, they cannot be proper subjects of moral concern (i.e. moral patients). This account of moral standing in principle excludes machines from participating in our moral universe. I will argue that the Organic View operationalises anthropocentric intuitions regarding sentience ascription, and by extension how we identify moral patients. The main difference between the argument I provide here and traditional arguments surrounding moral attributability is that I do not necessarily defend the view that internal states ground our ascriptions of moral patiency. This is in contrast to views such as those defended by Singer (1975, 2011) and Torrance (2008), where concepts such as sentience play starring roles. I will raise both conceptual and epistemic issues with regards to this sense of sentience. While this does not preclude the usage of sentience outright, it suggests that we should be more careful in our usage of internal mental states to ground our moral ascriptions. Following from this I suggest other avenues for further exploration into machine moral patiency which may not have the same shortcomings as the Organic View.
@article{387, author = {Fabio Tollon}, title = {The artifcial view: toward a non‑anthropocentric account of moral patiency}, abstract = {In this paper I provide an exposition and critique of the Organic View of Ethical Status, as outlined by Torrance (2008). A key presupposition of this view is that only moral patients can be moral agents. It is claimed that because artificial agents lack sentience, they cannot be proper subjects of moral concern (i.e. moral patients). This account of moral standing in principle excludes machines from participating in our moral universe. I will argue that the Organic View operationalises anthropocentric intuitions regarding sentience ascription, and by extension how we identify moral patients. The main difference between the argument I provide here and traditional arguments surrounding moral attributability is that I do not necessarily defend the view that internal states ground our ascriptions of moral patiency. This is in contrast to views such as those defended by Singer (1975, 2011) and Torrance (2008), where concepts such as sentience play starring roles. I will raise both conceptual and epistemic issues with regards to this sense of sentience. While this does not preclude the usage of sentience outright, it suggests that we should be more careful in our usage of internal mental states to ground our moral ascriptions. Following from this I suggest other avenues for further exploration into machine moral patiency which may not have the same shortcomings as the Organic View.}, year = {2020}, journal = {Ethics and Information Technology}, volume = {22}, issue = {4}, publisher = {Springer}, url = {https://link.springer.com/article/10.1007%2Fs10676-020-09540-4}, doi = {https://doi.org/10.1007/s10676-020-09540-4}, }
This paper contributes to the debate in the ethics of social robots on how or whether to treat social robots morally by way of considering a novel perspective on the moral relations between human interactants and social robots. This perspective is significant as it allows us to circumnavigate debates about the (im)possibility of robot consciousness and moral patiency (debates which often slow down discussion on the ethics of HRI), thus allowing us to address actual and urgent current ethical issues in relation to human-robot interaction. The paper considers the different ways in which human interactants may be moral patients in the context of interaction with social robots: robots as conduits of human moral action towards human moral patients; humans as moral patients to the actions of robots; and human interactants as moral patients of their own agential moral actions towards social robots. This third perspective is the focal point of the paper. The argument is that due to perceived robot consciousness, and the possibility that the immoral treatment of social robots may morally harm human interactants, there is a unique moral relation between humans and social robots wherein human interactants are both the moral agents of their actions towards robots, as well as the actual moral patients of those agential moral actions towards robots. Robots, however, are no more than perceived moral patients. This discussion further adds to debates in the context of robot moral status, and the consideration of the moral treatment of robots in the context of human-robot interaction.
@article{385, author = {Cindy Friedman}, title = {Human-Robot Moral Relations: Human Interactants as Moral Patients of Their Own Agential Moral Actions Towards Robots}, abstract = {This paper contributes to the debate in the ethics of social robots on how or whether to treat social robots morally by way of considering a novel perspective on the moral relations between human interactants and social robots. This perspective is significant as it allows us to circumnavigate debates about the (im)possibility of robot consciousness and moral patiency (debates which often slow down discussion on the ethics of HRI), thus allowing us to address actual and urgent current ethical issues in relation to human-robot interaction. The paper considers the different ways in which human interactants may be moral patients in the context of interaction with social robots: robots as conduits of human moral action towards human moral patients; humans as moral patients to the actions of robots; and human interactants as moral patients of their own agential moral actions towards social robots. This third perspective is the focal point of the paper. The argument is that due to perceived robot consciousness, and the possibility that the immoral treatment of social robots may morally harm human interactants, there is a unique moral relation between humans and social robots wherein human interactants are both the moral agents of their actions towards robots, as well as the actual moral patients of those agential moral actions towards robots. Robots, however, are no more than perceived moral patients. This discussion further adds to debates in the context of robot moral status, and the consideration of the moral treatment of robots in the context of human-robot interaction.}, year = {2020}, journal = {Communications in Computer and Information Science}, volume = {1342}, pages = {3-20}, publisher = {Springer}, isbn = {978-3-030-66151-9}, url = {https://link.springer.com/chapter/10.1007/978-3-030-66151-9_1}, doi = {https://doi.org/10.1007/978-3-030-66151-9_1}, }
In the face of the fact that AI ethics guidelines currently, on the whole, seem to have no significant impact on AI practices, the quest of AI ethics to ensure trustworthy AI is in danger of becoming nothing more than a nice ideal. Serious work is to be done to ensure AI ethics guidelines are actionable. To this end, in this paper, I argue that AI ethics should be approached 1) in a multi-disciplinary manner focused on concrete research in the discipline of the ethics of AI and 2) as a dynamic system on the basis of virtue ethics in order to work towards enabling all AI actors to take responsibility for their own actions and to hold others accountable for theirs. In conclusion, the paper emphasises the importance of understanding AI ethics as playing out on a continuum of interconnected interests across academia, civil society, public policy-making and the private sector, and a novel notion of ‘AI ethics capital’ is put on the table as outcome of actionable AI ethics and essential ingredient for sustainable trustworthy AI.
@article{384, author = {Emma Ruttkamp-Bloem}, title = {The Quest for Actionable AI Ethics}, abstract = {In the face of the fact that AI ethics guidelines currently, on the whole, seem to have no significant impact on AI practices, the quest of AI ethics to ensure trustworthy AI is in danger of becoming nothing more than a nice ideal. Serious work is to be done to ensure AI ethics guidelines are actionable. To this end, in this paper, I argue that AI ethics should be approached 1) in a multi-disciplinary manner focused on concrete research in the discipline of the ethics of AI and 2) as a dynamic system on the basis of virtue ethics in order to work towards enabling all AI actors to take responsibility for their own actions and to hold others accountable for theirs. In conclusion, the paper emphasises the importance of understanding AI ethics as playing out on a continuum of interconnected interests across academia, civil society, public policy-making and the private sector, and a novel notion of ‘AI ethics capital’ is put on the table as outcome of actionable AI ethics and essential ingredient for sustainable trustworthy AI.}, year = {2020}, journal = {Communications in Computer and Information Science}, volume = {1342}, pages = {34-52}, publisher = {Springer}, isbn = {978-3-030-66151-9}, url = {https://link.springer.com/chapter/10.1007/978-3-030-66151-9_3}, doi = {https://doi.org/10.1007/978-3-030-66151-9_3}, }
Propositional KLM-style defeasible reasoning involves a core propositional logic capable of expressing defeasible (or conditional) implications. The semantics for this logic is based on Kripke-like structures known as ranked interpretations. KLM-style defeasible entailment is referred to as rational whenever the defeasible entailment relation under consideration generates a set of defeasible implications all satisfying a set of rationality postulates known as the KLM postulates. In a recent paper Booth et al. proposed PTL, a logic that is more expressive than the core KLM logic. They proved an impossibility result, showing that defeasible entailment for PTL fails to satisfy a set of rationality postulates similar in spirit to the KLM postulates. Their interpretation of the impossibility result is that defeasible entailment for PTL need not be unique. In this paper we continue the line of research in which the expressivity of the core KLM logic is extended. We present the logic Boolean KLM (BKLM) in which we allow for disjunctions, conjunctions, and negations, but not nesting, of defeasible implications. Our contribution is twofold. Firstly, we show (perhaps surprisingly) that BKLM is more expressive than PTL. Our proof is based on the fact that BKLM can characterise all single ranked interpretations, whereas PTL cannot. Secondly, given that the PTL impossibility result also applies to BKLM, we adapt the different forms of PTL entailment proposed by Booth et al. to apply to BKLM.
@misc{383, author = {Guy Paterson-Jones and Giovanni Casini and Tommie Meyer}, title = {BKLM - An expressive logic for defeasible reasoning}, abstract = {Propositional KLM-style defeasible reasoning involves a core propositional logic capable of expressing defeasible (or conditional) implications. The semantics for this logic is based on Kripke-like structures known as ranked interpretations. KLM-style defeasible entailment is referred to as rational whenever the defeasible entailment relation under consideration generates a set of defeasible implications all satisfying a set of rationality postulates known as the KLM postulates. In a recent paper Booth et al. proposed PTL, a logic that is more expressive than the core KLM logic. They proved an impossibility result, showing that defeasible entailment for PTL fails to satisfy a set of rationality postulates similar in spirit to the KLM postulates. Their interpretation of the impossibility result is that defeasible entailment for PTL need not be unique. In this paper we continue the line of research in which the expressivity of the core KLM logic is extended. We present the logic Boolean KLM (BKLM) in which we allow for disjunctions, conjunctions, and negations, but not nesting, of defeasible implications. Our contribution is twofold. Firstly, we show (perhaps surprisingly) that BKLM is more expressive than PTL. Our proof is based on the fact that BKLM can characterise all single ranked interpretations, whereas PTL cannot. Secondly, given that the PTL impossibility result also applies to BKLM, we adapt the different forms of PTL entailment proposed by Booth et al. to apply to BKLM.}, year = {2020}, journal = {18th International Workshop on Non-Monotonic Reasoning}, month = {12/09/2020-24/09/2020}, }
We present a formal framework for modelling belief change within a non-monotonic reasoning system. Belief change and non-monotonic reasoning are two areas that are formally closely related, with recent attention being paid towards the analysis of belief change within a non-monotonic environment. In this paper we consider the classical AGM belief change operators, contraction and revision, applied to a defeasible setting in the style of Kraus, Lehmann, and Magidor. The investigation leads us to the formal characterisation of a number of classes of defeasible belief change operators. For the most interesting classes we need to consider the problem of iterated belief change, generalising the classical work of Darwiche and Pearl in the process. Our work involves belief change operators aimed at ensuring logical consistency, as well as the characterisation of analogous operators aimed at obtaining coherence—an important notion within the field of logic-based ontologies
@{382, author = {Giovanni Casini and Tommie Meyer and Ivan Varzinczak}, title = {Rational Defeasible Belief Change}, abstract = {We present a formal framework for modelling belief change within a non-monotonic reasoning system. Belief change and non-monotonic reasoning are two areas that are formally closely related, with recent attention being paid towards the analysis of belief change within a non-monotonic environment. In this paper we consider the classical AGM belief change operators, contraction and revision, applied to a defeasible setting in the style of Kraus, Lehmann, and Magidor. The investigation leads us to the formal characterisation of a number of classes of defeasible belief change operators. For the most interesting classes we need to consider the problem of iterated belief change, generalising the classical work of Darwiche and Pearl in the process. Our work involves belief change operators aimed at ensuring logical consistency, as well as the characterisation of analogous operators aimed at obtaining coherence—an important notion within the field of logic-based ontologies}, year = {2020}, journal = {17th International Conference on Principles of Knowledge Representation and Reasoning (KR 2020)}, pages = {213-222}, month = {12/09/2020}, publisher = {IJCAI}, address = {Virtual}, url = {https://library.confdna.com/kr/2020/}, doi = {10.24963/kr.2020/22}, }
Artificial Intelligence (AI) is already shaping our everyday lives. While there is enormous potential for harnessing AI to solve complex industrial and social problems and to create new and innovative products and solutions, many organisations are still grappling to understand the relevance and future impact of AI on their activities and what they should be doing about it.
@misc{381, author = {Deshen Moodley and Tommie Meyer}, title = {Artificial Intelligence – Where it is heading and what we should do about it}, abstract = {Artificial Intelligence (AI) is already shaping our everyday lives. While there is enormous potential for harnessing AI to solve complex industrial and social problems and to create new and innovative products and solutions, many organisations are still grappling to understand the relevance and future impact of AI on their activities and what they should be doing about it.}, year = {2020}, url = {https://link.springer.com/article/10.1007/s42354-020-0269-5}, }
Traditional cluster analysis metrics rank clustering structures in terms of compactness and distinctness of clusters. However, in real world applications this is usually insufficient for selecting the optimal clustering structure. Domain experts and visual analysis are often relied on during evaluation, which results in a selection process that tends to be adhoc, subjective and difficult to reproduce. This work proposes the use of competency questions and a cluster scoring matrix to formalise expert knowledge and application requirements for qualitative evaluation of clustering structures. We show how a qualitative ranking of clustering structures can be integrated with traditional metrics to guide cluster evaluation and selection for generating representative energy consumption profiles that characterise residential electricity demand in South Africa. The approach is shown to be highly effective for identifying usable and expressive consumption profiles within this specific application context, and certainly has wider potential for efficient, transparent and repeatable cluster selection in real-world applications.
@{380, author = {Wiebke Toussaint and Deshen Moodley}, title = {Identifying optimal clustering structures for residential energy consumption patterns using competency questions}, abstract = {Traditional cluster analysis metrics rank clustering structures in terms of compactness and distinctness of clusters. However, in real world applications this is usually insufficient for selecting the optimal clustering structure. Domain experts and visual analysis are often relied on during evaluation, which results in a selection process that tends to be adhoc, subjective and difficult to reproduce. This work proposes the use of competency questions and a cluster scoring matrix to formalise expert knowledge and application requirements for qualitative evaluation of clustering structures. We show how a qualitative ranking of clustering structures can be integrated with traditional metrics to guide cluster evaluation and selection for generating representative energy consumption profiles that characterise residential electricity demand in South Africa. The approach is shown to be highly effective for identifying usable and expressive consumption profiles within this specific application context, and certainly has wider potential for efficient, transparent and repeatable cluster selection in real-world applications.}, year = {2020}, journal = {SAICSIT '20: Conference of the South African Institute of Computer Scientists and Information Technologists 2020}, pages = {66-73}, month = {14/09/2020}, publisher = {ACM Digital Library}, address = {Virtual}, isbn = {978-1-4503-8847-4}, url = {https://dl.acm.org/doi/proceedings/10.1145/3410886}, }
Depth cameras provide a natural and intuitive user interaction mechanism in virtual reality environments by using hand gestures as the primary user input. However, building robust VR systems that use depth cameras are challenging. Gesture recognition accuracy is affected by occlusion, variation in hand orientation and misclassification of similar hand gestures. This research explores the limits of the Leap Motion depth camera for static hand pose recognition in virtual reality applications. We propose a system for analysing static hand poses and for systematically identifying a pose set that can achieve a near-perfect recognition accuracy. The system consists of a hand pose taxonomy, a pose notation, a machine learning classifier and an algorithm to identify a reliable pose set that can achieve near perfect accuracy levels. We used this system to construct a benchmark hand pose data set containing 2550 static hand pose instances, and show how the algorithm can be used to systematically derive a set of poses that can produce an accuracy of 99% using a Support Vector Machine classifier.
@{379, author = {Andrew Clark and Anban Pillay and Deshen Moodley}, title = {A system for pose analysis and selection in virtual reality environments}, abstract = {Depth cameras provide a natural and intuitive user interaction mechanism in virtual reality environments by using hand gestures as the primary user input. However, building robust VR systems that use depth cameras are challenging. Gesture recognition accuracy is affected by occlusion, variation in hand orientation and misclassification of similar hand gestures. This research explores the limits of the Leap Motion depth camera for static hand pose recognition in virtual reality applications. We propose a system for analysing static hand poses and for systematically identifying a pose set that can achieve a near-perfect recognition accuracy. The system consists of a hand pose taxonomy, a pose notation, a machine learning classifier and an algorithm to identify a reliable pose set that can achieve near perfect accuracy levels. We used this system to construct a benchmark hand pose data set containing 2550 static hand pose instances, and show how the algorithm can be used to systematically derive a set of poses that can produce an accuracy of 99% using a Support Vector Machine classifier.}, year = {2020}, journal = {SAICSIT '20: Conference of the South African Institute of Computer Scientists and Information Technologists 2020}, pages = {210-216}, month = {14/09/2020}, publisher = {ACM Digital Library}, address = {Virtual}, isbn = {978-1-4503-8847-4}, url = {https://dl.acm.org/doi/proceedings/10.1145/3410886}, }
Complex societal problems require a multi-disciplinary and multi-method approach to develop models that can support the development of solutions. General morphological analysis is a qualitative method to extract information from experts through facilitation and the use of customized software. Ontologies provide semantic representation of knowledge bases together with automated reasoning capabilities. These two approaches, combined with the use of concept maps, provide an integrated approach which can be used to understand complex and ill-structured problem domains and to aid in business modelling, strategy and scenario development and finally, decision-making. The resulting models are subjective constructs reflecting the knowledge and understanding of the analysts. Subsequent synthesis of new understanding and decisions rely on the robust validation and verification of the underlying logic and assumptions of the conceptual models. Morphological Analysis and ontological constructs are applied in terms of an integrated Morphological Ontology Design Engineering methodology (MODE), which is based on Design Science. The paper is developed around the opportunity of scoping the applied research competence required to support a nation’s progress toward energy sufficiency. This paper presents a complex fused model for national energy sufficiency in New Zealand. The approach can be used to address other ill- structured complex societal problems.
@{375, author = {JH Roodt and Louise Leenen and Jansen van Vuuren}, title = {Modelling Of The Complex Societal Problem Of Establishing A National Energy Sufficiency Competence}, abstract = {Complex societal problems require a multi-disciplinary and multi-method approach to develop models that can support the development of solutions. General morphological analysis is a qualitative method to extract information from experts through facilitation and the use of customized software. Ontologies provide semantic representation of knowledge bases together with automated reasoning capabilities. These two approaches, combined with the use of concept maps, provide an integrated approach which can be used to understand complex and ill-structured problem domains and to aid in business modelling, strategy and scenario development and finally, decision-making. The resulting models are subjective constructs reflecting the knowledge and understanding of the analysts. Subsequent synthesis of new understanding and decisions rely on the robust validation and verification of the underlying logic and assumptions of the conceptual models. Morphological Analysis and ontological constructs are applied in terms of an integrated Morphological Ontology Design Engineering methodology (MODE), which is based on Design Science. The paper is developed around the opportunity of scoping the applied research competence required to support a nation’s progress toward energy sufficiency. This paper presents a complex fused model for national energy sufficiency in New Zealand. The approach can be used to address other ill- structured complex societal problems.}, year = {2020}, journal = {23rd International Conference on Information Fusion}, pages = {880 - 887}, month = {06/07-09/07}, isbn = {978-0-578-64709-8}, }
The protection and management of data, and especially personal information, is becoming an issue of critical importance in both the business environment and in general society. Various institutions have justifiable reasons to gather the personal information of individuals but they are required to comply with any legislation involving the processing of such data. Organisations thus face legal and other repercussions should personal information be breached or treated negligently. Most countries have adopted privacy and data protection laws or are in the process of enacting such laws. In South Africa, the Protection of Privacy Information Act (POPIA) was formally adopted in 2013 but it is yet to be implemented. When the implementation of the Act is announced, role players (responsible parties and data subjects) affected by POPIA will have a grace period of a year to become compliant and/or understand how the Act will affect them. One example of a mandate that follows from POPIA is data breach notification. This paper presents the development of a prototype ontology on POPIA to promote transparency and education of affected data subjects and organisations including government departments. The ontology provides a semantic representation of a knowledge base for the regulations in the POPIA and how it affects these role players. The POPIA is closely aligned with the European Union’s General Data Protection Regulation (GDPR), and the POPIA ontology is inspired by similar ontologies developed for the GDPR.
@{374, author = {Y Jafta and Louise Leenen and P Chan}, title = {An Ontology for the South African Protection of Personal Information Act}, abstract = {The protection and management of data, and especially personal information, is becoming an issue of critical importance in both the business environment and in general society. Various institutions have justifiable reasons to gather the personal information of individuals but they are required to comply with any legislation involving the processing of such data. Organisations thus face legal and other repercussions should personal information be breached or treated negligently. Most countries have adopted privacy and data protection laws or are in the process of enacting such laws. In South Africa, the Protection of Privacy Information Act (POPIA) was formally adopted in 2013 but it is yet to be implemented. When the implementation of the Act is announced, role players (responsible parties and data subjects) affected by POPIA will have a grace period of a year to become compliant and/or understand how the Act will affect them. One example of a mandate that follows from POPIA is data breach notification. This paper presents the development of a prototype ontology on POPIA to promote transparency and education of affected data subjects and organisations including government departments. The ontology provides a semantic representation of a knowledge base for the regulations in the POPIA and how it affects these role players. The POPIA is closely aligned with the European Union’s General Data Protection Regulation (GDPR), and the POPIA ontology is inspired by similar ontologies developed for the GDPR.}, year = {2020}, journal = {The 19th European Conference on Cyber Warfare and Security}, pages = {158 - 176}, month = {25/06 - 26/06}, publisher = {Academic Conferences and Publishing International Limited}, address = {UK}, isbn = {978-1-912764-61-7}, }
In the recent past, some Internet users questioned the reliability of online news, but not necessarily the role of search engines in programming public discourse. In 2018, South African Twitter users accused Google of peddling misinformation when Google Image searches for the phrase “squatter camps in South Africa” displayed images of white squatter camps. Many ana-lysts blamed Google’s algorithm for displaying bias. In this article, the authors use this example in comparing the findings of six different search engines to counter this argument. Search engines that are diverse in their scope and origin are used to prove that is it not the algorithm, but rather the data that is biased.
@article{373, author = {Jansen van Vuuren and Louise Leenen}, title = {Proving It Is the Data That Is Biased, Not the Algorithm Through a Recent South African Online Case Study}, abstract = {In the recent past, some Internet users questioned the reliability of online news, but not necessarily the role of search engines in programming public discourse. In 2018, South African Twitter users accused Google of peddling misinformation when Google Image searches for the phrase “squatter camps in South Africa” displayed images of white squatter camps. Many ana-lysts blamed Google’s algorithm for displaying bias. In this article, the authors use this example in comparing the findings of six different search engines to counter this argument. Search engines that are diverse in their scope and origin are used to prove that is it not the algorithm, but rather the data that is biased.}, year = {2020}, journal = {Journal of Information Warfare}, volume = {19}, pages = {118-129}, issue = {3}, publisher = {Peregrine Technical Solutions}, address = {Virginia, USA}, isbn = {1445-3312}, }
Cybercrime is increasing at a rate few individuals would have predicted. IBM estimated in 2016 that, in 2019, the cost of cybercrime would reach $2 trillion, a threefold increase from the 2015 estimate of $500 billion. The growth of the Internet and the rapid development of technology provide enormous economic and social benefits but at the same time provide platforms for cybercriminals to exploit. Organised crime is using more sophisticated techniques, which require highly skilled and specialised law enforcement responses. One example is the use of cryptocurrencies, which makes it easier for cybercriminals to hide their proceeds. Regulatory measures often lag behind. In this paper, the authors give an overview of the growing threat of cybercrime with a specific focus on high levels of cybercrime in Africa. The focus then turns to the development of national cybercrime strategies and implementation. Results from literature and the authors’ analyses of two cyber indices to measure the capabilities and capacities of countries are combined to present a framework for the development of a cybercrime strategy, and in particular, a strategy customised for African countries.
@article{372, author = {Jansen van Vuuren and Louise Leenen and P Pieterse}, title = {Development and Implementation of Cybercrime Strategies in Africa with Specific Reference to South Africa}, abstract = {Cybercrime is increasing at a rate few individuals would have predicted. IBM estimated in 2016 that, in 2019, the cost of cybercrime would reach $2 trillion, a threefold increase from the 2015 estimate of $500 billion. The growth of the Internet and the rapid development of technology provide enormous economic and social benefits but at the same time provide platforms for cybercriminals to exploit. Organised crime is using more sophisticated techniques, which require highly skilled and specialised law enforcement responses. One example is the use of cryptocurrencies, which makes it easier for cybercriminals to hide their proceeds. Regulatory measures often lag behind. In this paper, the authors give an overview of the growing threat of cybercrime with a specific focus on high levels of cybercrime in Africa. The focus then turns to the development of national cybercrime strategies and implementation. Results from literature and the authors’ analyses of two cyber indices to measure the capabilities and capacities of countries are combined to present a framework for the development of a cybercrime strategy, and in particular, a strategy customised for African countries.}, year = {2020}, journal = {Journal of Information Warfare}, volume = {19}, pages = {83 - 101}, issue = {3}, publisher = {Peregrine Technical Solutions}, address = {Virginia, USA}, isbn = {1445-3312}, }
Cybersecurity is often incorrectly assumed to be a purely technical field; however, there are numerous multidisciplinary aspects, such as, for example, human factors, legal, and governance issues. The broad scope, combined with other historical or bureaucratic factors, can provide challenges to researchers and students where appropriate methodologies do not necessarily conform to traditional disciplinary norms; prejudice against research approaches can occur as a result of ‘old school thought’. This paper aims to investigate the South African national and institutional perspectives for higher education and research, identify challenges, and propose solutions to facilitate multidisciplinary research into cybersecurity and Information Warfare (IW) in South Africa.
@article{371, author = {T Ramluckan and B van Niekerk and Louise Leenen}, title = {Cybersecurity and Information Warfare Research in South Africa: Challenges and Proposed Solutions}, abstract = {Cybersecurity is often incorrectly assumed to be a purely technical field; however, there are numerous multidisciplinary aspects, such as, for example, human factors, legal, and governance issues. The broad scope, combined with other historical or bureaucratic factors, can provide challenges to researchers and students where appropriate methodologies do not necessarily conform to traditional disciplinary norms; prejudice against research approaches can occur as a result of ‘old school thought’. This paper aims to investigate the South African national and institutional perspectives for higher education and research, identify challenges, and propose solutions to facilitate multidisciplinary research into cybersecurity and Information Warfare (IW) in South Africa.}, year = {2020}, journal = {Journal of Information Warfare}, volume = {19}, pages = {80-95}, issue = {1}, publisher = {Peregrine Technical Solutions}, address = {Virginia, USA}, isbn = {ISSN 1445-3312}, }
A robust theoretical framework that can describe and predict the generalization ability of deep neural networks (DNNs) in general circumstances remains elusive. Classical attempts have produced complexity metrics that rely heavily on global measures of compactness and capacity with little investigation into the effects of sub-component collaboration. We demonstrate intriguing regularities in the activation patterns of the hidden nodes within fully-connected feedforward networks. By tracing the origin of these patterns, we show how such networks can be viewed as the combination of two information processing systems: one continuous and one discrete. We describe how these two systems arise naturally from the gradient-based optimization process, and demonstrate the classification ability of the two systems, individually and in collaboration. This perspective on DNN classification offers a novel way to think about generalization, in which different subsets of the training data are used to train distinct classifiers; those classifiers are then combined to perform the classification task, and their consistency is crucial for accurate classification.
@{236, author = {Marelie Davel and Marthinus Theunissen and Arnold Pretorius and Etienne Barnard}, title = {DNNs as layers of cooperating classifiers}, abstract = {A robust theoretical framework that can describe and predict the generalization ability of deep neural networks (DNNs) in general circumstances remains elusive. Classical attempts have produced complexity metrics that rely heavily on global measures of compactness and capacity with little investigation into the effects of sub-component collaboration. We demonstrate intriguing regularities in the activation patterns of the hidden nodes within fully-connected feedforward networks. By tracing the origin of these patterns, we show how such networks can be viewed as the combination of two information processing systems: one continuous and one discrete. We describe how these two systems arise naturally from the gradient-based optimization process, and demonstrate the classification ability of the two systems, individually and in collaboration. This perspective on DNN classification offers a novel way to think about generalization, in which different subsets of the training data are used to train distinct classifiers; those classifiers are then combined to perform the classification task, and their consistency is crucial for accurate classification.}, year = {2020}, journal = {The Thirty-Fourth AAAI Conference on Artificial Intelligence (AAAI-20)}, pages = {3725 - 3732}, month = {07/02-12/02/2020}, address = {New York}, }
We present the design and development of a South African directory enquiries corpus. It contains audio and orthographic transcriptions of a wide range of South African names produced by first-language speakers of four languages, namely Afrikaans, English, isiZulu and Sesotho. Useful as a resource to understand the effect of name language and speaker language on pronunciation, this is the first corpus to also aim to identify the ‘‘intended language’’: an implicit assumption with regard to word origin made by the speaker of the name. We describe the design, collection, annotation, and verification of the corpus. This includes an analysis of the algorithms used to tag the corpus with meta information that may be beneficial to pronunciation modelling tasks.
@article{280, author = {Jan Thirion and Charl Van Heerden and Oluwapelumi Giwa and Marelie Davel}, title = {The South African directory enquiries (SADE) name corpus}, abstract = {We present the design and development of a South African directory enquiries corpus. It contains audio and orthographic transcriptions of a wide range of South African names produced by first-language speakers of four languages, namely Afrikaans, English, isiZulu and Sesotho. Useful as a resource to understand the effect of name language and speaker language on pronunciation, this is the first corpus to also aim to identify the ‘‘intended language’’: an implicit assumption with regard to word origin made by the speaker of the name. We describe the design, collection, annotation, and verification of the corpus. This includes an analysis of the algorithms used to tag the corpus with meta information that may be beneficial to pronunciation modelling tasks.}, year = {2020}, journal = {Language Resources & Evaluation}, volume = {54}, pages = {155-184}, issue = {1}, publisher = {Springer}, address = {Cape Town, South Africa}, doi = {10.1007/s10579-019-09448-6}, }
2019
No Abstract
@{368, author = {Stefan Lotz and Jacques Beukes and Marelie Davel}, title = {A neural network based method for input parameter selection (Poster)}, abstract = {No Abstract}, year = {2019}, journal = {Machine Learning in Heliophysics}, address = {Amsterdam, The Netherlands}, }