People

Latest Research Publications:

Latest Research Publications:

Latest Research Publications:
Building computational models of agents in dynamic, partially observable and stochastic environments is challenging. We propose a cognitive computational model of sugarcane growers’ daily decision-making to examine sugarcane supply chain complexities. Growers make decisions based on uncertain weather forecasts; cane dryness; unforeseen emergencies; and the mill’s unexpected call for delivery of a different amount of cane. The Belief-Desire-Intention (BDI) architecture has been used to model cognitive agents in many domains, including agriculture. However, typical implementations of this architecture have represented beliefs symbolically, so uncertain beliefs are usually not catered for. Here we show that a BDI architecture, enhanced with a dynamic decision network (DDN), suitably models sugarcane grower agents’ repeated daily decisions. Using two complex scenarios, we demonstrate that the agent selects the appropriate intention, and suggests how the grower should act adaptively and proactively to achieve his goals. In addition, we provide a mapping for using a DDN in a BDI architecture. This architecture can be used for modelling sugarcane grower agents in an agent-based simulation. The mapping of the DDN’s use in the BDI architecture enables this work to be applied to other domains for modelling agents’ repeated decisions in partially observable, stochastic and dynamic environments.
@article{488, author = {C. Sue Price, Deshen Moodley, Anban Pillay, Gavin Rens}, title = {An adaptive probabilistic agent architecture for modelling sugarcane growers’ decision-making}, abstract = {Building computational models of agents in dynamic, partially observable and stochastic environments is challenging. We propose a cognitive computational model of sugarcane growers’ daily decision-making to examine sugarcane supply chain complexities. Growers make decisions based on uncertain weather forecasts; cane dryness; unforeseen emergencies; and the mill’s unexpected call for delivery of a different amount of cane. The Belief-Desire-Intention (BDI) architecture has been used to model cognitive agents in many domains, including agriculture. However, typical implementations of this architecture have represented beliefs symbolically, so uncertain beliefs are usually not catered for. Here we show that a BDI architecture, enhanced with a dynamic decision network (DDN), suitably models sugarcane grower agents’ repeated daily decisions. Using two complex scenarios, we demonstrate that the agent selects the appropriate intention, and suggests how the grower should act adaptively and proactively to achieve his goals. In addition, we provide a mapping for using a DDN in a BDI architecture. This architecture can be used for modelling sugarcane grower agents in an agent-based simulation. The mapping of the DDN’s use in the BDI architecture enables this work to be applied to other domains for modelling agents’ repeated decisions in partially observable, stochastic and dynamic environments.}, year = {2022}, journal = {South African Computer Journal}, volume = {34}, pages = {152-191}, issue = {1}, url = {https://sacj.cs.uct.ac.za/index.php/sacj/article/view/857}, doi = {https://doi.org/10.18489/sacj.v34i1.857}, }
We explore how machine learning (ML) and Bayesian networks (BNs) can be combined in a personal health agent (PHA) for the detection and interpretation of electrocardiogram (ECG) characteristics. We propose a PHA that uses ECG data from wearables to monitor heart activity, and interprets and explains the observed readings. We focus on atrial fibrillation (AF), the commonest type of arrhythmia. The absence of a P-wave in an ECG is the hallmark indication of AF. Four ML models are trained to classify an ECG signal based on the presence or absence of the P-wave: multilayer perceptron (MLP), logistic regression, support vector machine, and random forest. The MLP is the best performing model with an accuracy of 89.61% and an F1 score of 88.68%. A BN representing AF risk factors is developed based on expert knowledge from the literature and evaluated using Pitchforth and Mengersen’s validation framework. The P-wave presence or absence as determined by the ML model is input into the BN. The PHA is evaluated using sample use cases to illustrate how the BN can explain the occurrence of AF using diagnostic reasoning. This gives the most likely AF risk factors for the individual
@inbook{478, author = {Tezira Wanyana, Mbithe Nzomo, C. Sue Price, Deshen Moodley}, title = {Combining Machine Learning and Bayesian Networks for ECG Interpretation and Explanation}, abstract = {We explore how machine learning (ML) and Bayesian networks (BNs) can be combined in a personal health agent (PHA) for the detection and interpretation of electrocardiogram (ECG) characteristics. We propose a PHA that uses ECG data from wearables to monitor heart activity, and interprets and explains the observed readings. We focus on atrial fibrillation (AF), the commonest type of arrhythmia. The absence of a P-wave in an ECG is the hallmark indication of AF. Four ML models are trained to classify an ECG signal based on the presence or absence of the P-wave: multilayer perceptron (MLP), logistic regression, support vector machine, and random forest. The MLP is the best performing model with an accuracy of 89.61% and an F1 score of 88.68%. A BN representing AF risk factors is developed based on expert knowledge from the literature and evaluated using Pitchforth and Mengersen’s validation framework. The P-wave presence or absence as determined by the ML model is input into the BN. The PHA is evaluated using sample use cases to illustrate how the BN can explain the occurrence of AF using diagnostic reasoning. This gives the most likely AF risk factors for the individual}, year = {2022}, journal = {Proceedings of the 8th International Conference on Information and Communication Technologies for Ageing Well and e-Health - ICT4AWE}, pages = {81-92}, publisher = {SciTePress}, address = {INSTICC}, isbn = {978-989-758-566-1}, doi = {https://doi.org/10.5220/0011046100003188}, }
Stock markets are dynamic systems that exhibit complex intra-share and inter-share temporal dependencies. Spatial-temporal graph neural networks (ST-GNN) are emerging DNN architectures that have yielded high performance for flow prediction in dynamic systems with complex spatial and temporal dependencies such as city traffic networks. In this research, we apply three state-of-the-art ST-GNN architectures, i.e. Graph WaveNet, MTGNN and StemGNN, to predict the closing price of shares listed on the Johannesburg Stock Exchange (JSE) and attempt to capture complex inter-share dependencies. The results show that ST-GNN architectures, specifically Graph WaveNet, produce superior performance relative to an LSTM and are potentially capable of capturing complex intra-share and inter-share temporal dependencies in the JSE. We found that Graph WaveNet outperforms the other approaches over short-term and medium-term horizons. This work is one of the first studies to apply these ST-GNNs to share price prediction.
@article{443, author = {Kialan Pillay, Deshen Moodley}, title = {Exploring Graph Neural Networks for Stock Market Prediction on the JSE}, abstract = {Stock markets are dynamic systems that exhibit complex intra-share and inter-share temporal dependencies. Spatial-temporal graph neural networks (ST-GNN) are emerging DNN architectures that have yielded high performance for flow prediction in dynamic systems with complex spatial and temporal dependencies such as city traffic networks. In this research, we apply three state-of-the-art ST-GNN architectures, i.e. Graph WaveNet, MTGNN and StemGNN, to predict the closing price of shares listed on the Johannesburg Stock Exchange (JSE) and attempt to capture complex inter-share dependencies. The results show that ST-GNN architectures, specifically Graph WaveNet, produce superior performance relative to an LSTM and are potentially capable of capturing complex intra-share and inter-share temporal dependencies in the JSE. We found that Graph WaveNet outperforms the other approaches over short-term and medium-term horizons. This work is one of the first studies to apply these ST-GNNs to share price prediction.}, year = {2022}, journal = {Communications in Computer and Information Science}, volume = {1551}, pages = {95-110}, publisher = {Springer}, address = {Cham}, isbn = {978-3-030-95070-5}, url = {https://link.springer.com/chapter/10.1007/978-3-030-95070-5_7}, doi = {10.1007/978-3-030-95070-5_7}, }
This research proposes an architecture and prototype implementation of a knowledge-based system for automating share evaluation and investment decision making on the Johannesburg Stock Exchange (JSE). The knowledge acquired from an analysis of the investment domain for a value investing approach is represented in an ontology. A Bayesian network, developed using the ontology, is used to capture the complex causal relations between different factors that influence the quality and value of individual shares. The system was found to adequately represent the decision-making process of investment professionals and provided superior returns to selected benchmark JSE indices from 2012 to 2018.
@{442, author = {Rachel Drake, Deshen Moodley}, title = {INVEST: Ontology Driven Bayesian Networks for Investment Decision Making on the JSE}, abstract = {This research proposes an architecture and prototype implementation of a knowledge-based system for automating share evaluation and investment decision making on the Johannesburg Stock Exchange (JSE). The knowledge acquired from an analysis of the investment domain for a value investing approach is represented in an ontology. A Bayesian network, developed using the ontology, is used to capture the complex causal relations between different factors that influence the quality and value of individual shares. The system was found to adequately represent the decision-making process of investment professionals and provided superior returns to selected benchmark JSE indices from 2012 to 2018.}, year = {2022}, journal = {Second Southern African Conference for AI Research (SACAIR 2022)}, pages = {252-273}, month = {06/12/2021-10/12/2021}, address = {Online}, isbn = {978-0-620-94410-6}, url = {https://protect-za.mimecast.com/s/OFYSCpgo02fL1l9gtDHUkY}, }
The abductive theory of method (ATOM) was recently proposed to describe the process that scientists use for knowledge discovery. In this paper we propose an agent architecture for knowledge discovery and evolution (KDE) based on ATOM. The agent incorporates a combination of ontologies, rules and Bayesian networks for representing different aspects of its internal knowledge. The agent uses an external AI service to detect unexpected situations from incoming observations. It then uses rules to analyse the current situation and a Bayesian network for finding plausible explanations for unexpected situations. The architecture is evaluated and analysed on a use case application for monitoring daily household electricity consumption patterns.
@inbook{425, author = {Tezira Wanyana, Deshen Moodley}, title = {An Agent Architecture for Knowledge Discovery and Evolution}, abstract = {The abductive theory of method (ATOM) was recently proposed to describe the process that scientists use for knowledge discovery. In this paper we propose an agent architecture for knowledge discovery and evolution (KDE) based on ATOM. The agent incorporates a combination of ontologies, rules and Bayesian networks for representing different aspects of its internal knowledge. The agent uses an external AI service to detect unexpected situations from incoming observations. It then uses rules to analyse the current situation and a Bayesian network for finding plausible explanations for unexpected situations. The architecture is evaluated and analysed on a use case application for monitoring daily household electricity consumption patterns.}, year = {2021}, journal = {KI 2021: Advances in Artificial Intelligence}, edition = {volume 12873}, pages = {241-256}, publisher = {Springer International Publishing}, address = {Cham}, isbn = {978-3-030-87626-5}, doi = {https://doi.org/10.1007/978-3-030-87626-5_18}, }
Latest Research Publications:
Explanation services are a crucial aspect of symbolic reasoning systems but they have not been explored in detail for defeasible formalisms such as KLM. We evaluate prior work on the topic with a focus on KLM propositional logic and find that a form of defeasible explanation initially described for Rational Closure which we term weak justification can be adapted to Relevant and Lexicographic Closure as well as described in terms of intuitive properties derived from the KLM postulates. We also consider how a more general definition of defeasible explanation known as strong explanation applies to KLM and propose an algorithm that enumerates these justifications for Rational Closure.
@inbook{426, author = {Lloyd Everett, Emily Morris, Tommie Meyer}, title = {Explanation for KLM-Style Defeasible Reasoning}, abstract = {Explanation services are a crucial aspect of symbolic reasoning systems but they have not been explored in detail for defeasible formalisms such as KLM. We evaluate prior work on the topic with a focus on KLM propositional logic and find that a form of defeasible explanation initially described for Rational Closure which we term weak justification can be adapted to Relevant and Lexicographic Closure as well as described in terms of intuitive properties derived from the KLM postulates. We also consider how a more general definition of defeasible explanation known as strong explanation applies to KLM and propose an algorithm that enumerates these justifications for Rational Closure.}, year = {2022}, journal = {Artificial Intelligence Research. SACAIR 2021.}, edition = {1551}, publisher = {Springer}, address = {Cham}, isbn = {978-3-030-95069-9}, url = {https://link.springer.com/book/10.1007/978-3-030-95070-5}, doi = {10.1007/978-3-030-95070-5_13}, }

Latest Research Publications:
While deep neural networks (DNNs) have become a standard architecture for many machine learning tasks, their internal decision-making process and general interpretability is still poorly understood. Conversely, common decision trees are easily interpretable and theoretically well understood. We show that by encoding the discrete sample activation values of nodes as a binary representation, we are able to extract a decision tree explaining the classification procedure of each layer in a ReLU-activated multilayer perceptron (MLP). We then combine these decision trees with existing feature attribution techniques in order to produce an interpretation of each layer of a model. Finally, we provide an analysis of the generated interpretations, the behaviour of the binary encodings and how these relate to sample groupings created during the training process of the neural network.
@article{479, author = {Coenraad Mouton, Marelie Davel}, title = {Exploring layerwise decision making in DNNs}, abstract = {While deep neural networks (DNNs) have become a standard architecture for many machine learning tasks, their internal decision-making process and general interpretability is still poorly understood. Conversely, common decision trees are easily interpretable and theoretically well understood. We show that by encoding the discrete sample activation values of nodes as a binary representation, we are able to extract a decision tree explaining the classification procedure of each layer in a ReLU-activated multilayer perceptron (MLP). We then combine these decision trees with existing feature attribution techniques in order to produce an interpretation of each layer of a model. Finally, we provide an analysis of the generated interpretations, the behaviour of the binary encodings and how these relate to sample groupings created during the training process of the neural network.}, year = {2022}, journal = {Communications in Computer and Information Science}, volume = {1551}, pages = {140 - 155}, publisher = {Artificial Intelligence Research (SACAIR 2021)}, doi = {https://doi.org/10.1007/978-3-030-95070-5_10}, }
Although Convolutional Neural Networks (CNNs) are widely used, their translation invariance (ability to deal with translated inputs) is still subject to some controversy. We explore this question using translation-sensitivity maps to quantify how sensitive a standard CNN is to a translated input. We propose the use of cosine similarity as sensitivity metric over Euclidean distance, and discuss the importance of restricting the dimensionality of either of these metrics when comparing architectures. Our main focus is to investigate the effect of different architectural components of a standard CNN on that network’s sensitivity to translation. By varying convolutional kernel sizes and amounts of zero padding, we control the size of the feature maps produced, allowing us to quantify the extent to which these elements influence translation invariance. We also measure translation invariance at different locations within the CNN to determine the extent to which convolutional and fully connected layers, respectively, contribute to the translation invariance of a CNN as a whole. Our analysis indicates that both convolutional kernel size and feature map size have a systematic influence on translation invariance. We also see that convolutional layers contribute less than expected to translation invariance, when not specifically forced to do so.
@article{485, author = {Johannes Myburgh, Coenraad Mouton, Marelie Davel}, title = {Tracking translation invariance in CNNs}, abstract = {Although Convolutional Neural Networks (CNNs) are widely used, their translation invariance (ability to deal with translated inputs) is still subject to some controversy. We explore this question using translation-sensitivity maps to quantify how sensitive a standard CNN is to a translated input. We propose the use of cosine similarity as sensitivity metric over Euclidean distance, and discuss the importance of restricting the dimensionality of either of these metrics when comparing architectures. Our main focus is to investigate the effect of different architectural components of a standard CNN on that network’s sensitivity to translation. By varying convolutional kernel sizes and amounts of zero padding, we control the size of the feature maps produced, allowing us to quantify the extent to which these elements influence translation invariance. We also measure translation invariance at different locations within the CNN to determine the extent to which convolutional and fully connected layers, respectively, contribute to the translation invariance of a CNN as a whole. Our analysis indicates that both convolutional kernel size and feature map size have a systematic influence on translation invariance. We also see that convolutional layers contribute less than expected to translation invariance, when not specifically forced to do so.}, year = {2020}, journal = {Communications in Computer and Information Science}, volume = {1342}, pages = {282-295}, publisher = {Southern African Conference for Artificial Intelligence Research}, isbn = {978-3-030-66151-9}, doi = {https://doi.org/10.1007/978-3-030-66151-9_18}, }
Convolutional Neural Networks have become the standard for image classification tasks, however, these architectures are not invariant to translations of the input image. This lack of invariance is attributed to the use of stride which subsamples the input, resulting in a loss of information, and fully connected layers which lack spatial reasoning. We show that stride can greatly benefit translation invariance given that it is combined with sufficient similarity between neighbouring pixels, a characteristic which we refer to as local homogeneity. We also observe that this characteristic is dataset-specific and dictates the relationship between pooling kernel size and stride required for translation invariance. Furthermore we find that a trade-off exists between generalization and translation invariance in the case of pooling kernel size, as larger kernel sizes lead to better invariance but poorer generalization. Finally we explore the efficacy of other solutions proposed, namely global average pooling, anti-aliasing, and data augmentation, both empirically and through the lens of local homogeneity.
@article{486, author = {Coenraad Mouton, Johannes Myburgh, Marelie Davel}, title = {Stride and translation invariance in CNNs}, abstract = {Convolutional Neural Networks have become the standard for image classification tasks, however, these architectures are not invariant to translations of the input image. This lack of invariance is attributed to the use of stride which subsamples the input, resulting in a loss of information, and fully connected layers which lack spatial reasoning. We show that stride can greatly benefit translation invariance given that it is combined with sufficient similarity between neighbouring pixels, a characteristic which we refer to as local homogeneity. We also observe that this characteristic is dataset-specific and dictates the relationship between pooling kernel size and stride required for translation invariance. Furthermore we find that a trade-off exists between generalization and translation invariance in the case of pooling kernel size, as larger kernel sizes lead to better invariance but poorer generalization. Finally we explore the efficacy of other solutions proposed, namely global average pooling, anti-aliasing, and data augmentation, both empirically and through the lens of local homogeneity.}, year = {2020}, journal = {Communications in Computer and Information Science}, volume = {1342}, pages = {267-281}, publisher = {Southern African Conference for Artificial Intelligence Research}, address = {South Africa}, isbn = {978-3-030-66151-9}, doi = {https://doi.org/10.1007/978-3-030-66151-9_17}, }

“The Role of the Board in Artificial Intelligence Ethics and Governance – A Case
for JSE Listed Companies”
Workshop attended:
1) 23 March 2021 Catalysing cooperation: Working Together Across AI
Governance Initiatives
2) 13 April 2021 ICGAI: Meaning Inclusivity in Governing AI Revolution
3) 19 May 2021 Digital Insight: Bridging the Trust Gap – How to Govern AI
4) 17 June 2021 Intelligent Decisions powered by AI: A critical tool for Digital
Government
5) 23 June Artificial Intelligence: How Secure are your ML and AI projects and
How human bias limits
6) 23 September 2021 Applying AI to tackle the Climate Crisis
Publication:
Who is responsible? AI vs corporate governance and SA law
https://www.bizcommunity.com/Article/196/547/208888.html
Latest Research Publications:
Latest Research Publications:

Latest Research Publications:

Latest Research Publications:
We explore how machine learning (ML) and Bayesian networks (BNs) can be combined in a personal health agent (PHA) for the detection and interpretation of electrocardiogram (ECG) characteristics. We propose a PHA that uses ECG data from wearables to monitor heart activity, and interprets and explains the observed readings. We focus on atrial fibrillation (AF), the commonest type of arrhythmia. The absence of a P-wave in an ECG is the hallmark indication of AF. Four ML models are trained to classify an ECG signal based on the presence or absence of the P-wave: multilayer perceptron (MLP), logistic regression, support vector machine, and random forest. The MLP is the best performing model with an accuracy of 89.61% and an F1 score of 88.68%. A BN representing AF risk factors is developed based on expert knowledge from the literature and evaluated using Pitchforth and Mengersen’s validation framework. The P-wave presence or absence as determined by the ML model is input into the BN. The PHA is evaluated using sample use cases to illustrate how the BN can explain the occurrence of AF using diagnostic reasoning. This gives the most likely AF risk factors for the individual
@inbook{478, author = {Tezira Wanyana, Mbithe Nzomo, C. Sue Price, Deshen Moodley}, title = {Combining Machine Learning and Bayesian Networks for ECG Interpretation and Explanation}, abstract = {We explore how machine learning (ML) and Bayesian networks (BNs) can be combined in a personal health agent (PHA) for the detection and interpretation of electrocardiogram (ECG) characteristics. We propose a PHA that uses ECG data from wearables to monitor heart activity, and interprets and explains the observed readings. We focus on atrial fibrillation (AF), the commonest type of arrhythmia. The absence of a P-wave in an ECG is the hallmark indication of AF. Four ML models are trained to classify an ECG signal based on the presence or absence of the P-wave: multilayer perceptron (MLP), logistic regression, support vector machine, and random forest. The MLP is the best performing model with an accuracy of 89.61% and an F1 score of 88.68%. A BN representing AF risk factors is developed based on expert knowledge from the literature and evaluated using Pitchforth and Mengersen’s validation framework. The P-wave presence or absence as determined by the ML model is input into the BN. The PHA is evaluated using sample use cases to illustrate how the BN can explain the occurrence of AF using diagnostic reasoning. This gives the most likely AF risk factors for the individual}, year = {2022}, journal = {Proceedings of the 8th International Conference on Information and Communication Technologies for Ageing Well and e-Health - ICT4AWE}, pages = {81-92}, publisher = {SciTePress}, address = {INSTICC}, isbn = {978-989-758-566-1}, doi = {https://doi.org/10.5220/0011046100003188}, }
Latest Research Publications: