People
Latest Research Publications:
Explanation services are a crucial aspect of symbolic reasoning systems but they have not been explored in detail for defeasible formalisms such as KLM. We evaluate prior work on the topic with a focus on KLM propositional logic and find that a form of defeasible explanation initially described for Rational Closure which we term weak justification can be adapted to Relevant and Lexicographic Closure as well as described in terms of intuitive properties derived from the KLM postulates. We also consider how a more general definition of defeasible explanation known as strong explanation applies to KLM and propose an algorithm that enumerates these justifications for Rational Closure.
@inbook{426,
author = {Lloyd Everett, Emily Morris, Tommie Meyer},
title = {Explanation for KLM-Style Defeasible Reasoning},
abstract = {Explanation services are a crucial aspect of symbolic reasoning systems but they have not been explored in detail for defeasible formalisms such as KLM. We evaluate prior work on the topic with a focus on KLM propositional logic and find that a form of defeasible explanation initially described for Rational Closure which we term weak justification can be adapted to Relevant and Lexicographic Closure as well as described in terms of intuitive properties derived from the KLM postulates. We also consider how a more general definition of defeasible explanation known as strong explanation applies to KLM and propose an algorithm that enumerates these justifications for Rational Closure.},
year = {2022},
journal = {Artificial Intelligence Research. SACAIR 2021.},
edition = {1551},
publisher = {Springer},
address = {Cham},
isbn = {978-3-030-95069-9},
url = {https://link.springer.com/book/10.1007/978-3-030-95070-5},
doi = {10.1007/978-3-030-95070-5_13},
}
Latest Research Publications:
@article{520,
author = {William Brooks, Marelie Davel, Coenraad Mouton},
title = {Does Simple Trump Complex? Comparing Strategies for Adversarial Robustness in DNNs},
abstract = {},
year = {2024},
journal = {Artificial Intelligence Research. SACAIR 2024. Communications in Computer and Information Science},
volume = {vol 2326},
pages = {253 - 269},
month = {12/2024},
publisher = {Springer Nature Switzerland},
address = {Cham},
doi = {https://doi.org/10.1007/978-3-031-78255-8_15},
}
<p>Batch normalization (BatchNorm) is a popular layer normalization technique used when training deep neural networks. It has been shown to enhance the training speed and accuracy of deep learning models. However, the mechanics by which BatchNorm achieves these benefits is an active area of research, and different perspectives have been proposed. In this paper, we investigate the effect of BatchNorm on the resulting hidden representations, that is, the vectors of activation values formed as samples are processed at each hidden layer. Specifically, we consider the sparsity of these representations, as well as their implicit clustering – the creation of groups of representations that are similar to some extent. We contrast image classification models trained with and without batch normalization and highlight consistent differences observed. These findings highlight that BatchNorm’s effect on representational sparsity is not a significant factor affecting generalization, while the representations of models trained with BatchNorm tend to show more advantageous clustering characteristics.</p>
@article{518,
author = {Harmen Potgieter, Coenraad Mouton, Marelie Davel},
title = {Impact of Batch Normalization on Convolutional Network Representations},
abstract = {<p>Batch normalization (BatchNorm) is a popular layer normalization technique used when training deep neural networks. It has been shown to enhance the training speed and accuracy of deep learning models. However, the mechanics by which BatchNorm achieves these benefits is an active area of research, and different perspectives have been proposed. In this paper, we investigate the effect of BatchNorm on the resulting hidden representations, that is, the vectors of activation values formed as samples are processed at each hidden layer. Specifically, we consider the sparsity of these representations, as well as their implicit clustering – the creation of groups of representations that are similar to some extent. We contrast image classification models trained with and without batch normalization and highlight consistent differences observed. These findings highlight that BatchNorm’s effect on representational sparsity is not a significant factor affecting generalization, while the representations of models trained with BatchNorm tend to show more advantageous clustering characteristics.</p>},
year = {2024},
journal = {Artificial Intelligence Research (SACAIR 2024)},
volume = {vol 2326},
pages = {235 - 252},
month = {12/2024},
publisher = {Springer Nature Switzerland},
address = {Cham},
doi = {https://doi.org/10.1007/978-3-031-78255-8_14},
}
<p>It has been observed that the input space of deep neural network classifiers can exhibit ‘fragmentation’, where the model function rapidly changes class as the input space is traversed. The severity of this fragmentation tends to follow the double descent curve, achieving a maximum at the interpolation regime. We study this phenomenon in the context of image classification and ask whether fragmentation could be predictive of generalization performance. Using a fragmentation-based complexity measure, we show this to be possible by achieving good performance on the PGDL (Predicting Generalization in Deep Learning) benchmark. In addition, we report on new observations related to fragmentation, namely (i) fragmentation is not limited to the input space but occurs in the hidden representations as well, (ii) fragmentation follows the trends in the validation error throughout training, and (iii) fragmentation is not a direct result of increased weight norms. Together, this indicates that fragmentation is a phenomenon worth investigating further when studying the generalization ability of deep neural networks.</p>
@misc{514,
author = {Coenraad Mouton, Randle Rabe, Daniël Haasbroek, Marthinus Theunissen, Harmen Potgieter, Marelie Davel},
title = {Is network fragmentation a useful complexity measure?},
abstract = {<p>It has been observed that the input space of deep neural network classifiers can exhibit ‘fragmentation’, where the model function rapidly changes class as the input space is traversed. The severity of this fragmentation tends to follow the double descent curve, achieving a maximum at the interpolation regime. We study this phenomenon in the context of image classification and ask whether fragmentation could be predictive of generalization performance. Using a fragmentation-based complexity measure, we show this to be possible by achieving good performance on the PGDL (Predicting Generalization in Deep Learning) benchmark. In addition, we report on new observations related to fragmentation, namely (i) fragmentation is not limited to the input space but occurs in the hidden representations as well, (ii) fragmentation follows the trends in the validation error throughout training, and (iii) fragmentation is not a direct result of increased weight norms. Together, this indicates that fragmentation is a phenomenon worth investigating further when studying the generalization ability of deep neural networks.</p>},
year = {2024},
journal = {NeurIPS 2024 Workshop SciForDL},
month = {12/2024},
}
<p>Understanding generalization in deep neural networks is an active area of research. A promising avenue of exploration has been that of margin measurements: the shortest distance to the decision boundary for a given sample or its representation internal to the network. While margins have been shown to be correlated with the generalization ability of a model when measured at its hidden representations (hidden margins), no such link between large margins and generalization has been established for input margins. We show that while input margins are not generally predictive of generalization, they can be if the search space is appropriately constrained. We develop such a measure based on input margins, which we refer to as ‘constrained margins’. The predictive power of this new measure is demonstrated on the ‘Predicting Generalization in Deep Learning’ (PGDL) dataset and contrasted with hidden representation margins. We find that constrained margins achieve highly competitive scores and outperform other margin measurements in general. This provides a novel insight on the relationship between generalization and classification margins, and highlights the importance of considering the data manifold for investigations of generalization in DNNs</p>
@inbook{512,
author = {Coenraad Mouton, Marthinus Theunissen, Marelie Davel},
title = {Input margins can predict generalization too},
abstract = {<p>Understanding generalization in deep neural networks is an active area of research. A promising avenue of exploration has been that of margin measurements: the shortest distance to the decision boundary for a given sample or its representation internal to the network. While margins have been shown to be correlated with the generalization ability of a model when measured at its hidden representations (hidden margins), no such link between large margins and generalization has been established for input margins. We show that while input margins are not generally predictive of generalization, they can be if the search space is appropriately constrained. We develop such a measure based on input margins, which we refer to as ‘constrained margins’. The predictive power of this new measure is demonstrated on the ‘Predicting Generalization in Deep Learning’ (PGDL) dataset and contrasted with hidden representation margins. We find that constrained margins achieve highly competitive scores and outperform other margin measurements in general. This provides a novel insight on the relationship between generalization and classification margins, and highlights the importance of considering the data manifold for investigations of generalization in DNNs</p>},
year = {2024},
journal = {In Proceedings of the Thirty-Eighth AAAI Conference on Artificial Intelligence and Thirty-Sixth Conference on Innovative Applications of Artificial Intelligence and Fourteenth Symposium on Educational Advances in Artificial Intelligence (AAAI'24/IAAI'24/E},
pages = {14379 - 14387},
month = {20 February 2024},
publisher = {AAAI Conference on Artificial Intelligence (AAAI)},
}
Classification margins are commonly used to estimate the generalization ability of machine learning models. We present an empirical study of these margins in artificial neural networks. A global estimate of margin size is usually used in the literature. In this work, we point out seldom considered nuances regarding classification margins. Notably, we demonstrate that some types of training samples are modelled with consistently small margins while affecting generalization in different ways. By showing a link with the minimum distance to a different-target sample and the remoteness of samples from one another, we provide a plausible explanation for this observation. We support our findings with an analysis of fully-connected networks trained on noise-corrupted MNIST data, as well as convolutional networks trained on noise-corrupted CIFAR10 data.
@inbook{505,
author = {Marthinus Theunissen, Coenraad Mouton, Marelie Davel},
title = {The Missing Margin: How Sample Corruption Affects Distance to the Boundary in ANNs},
abstract = {Classification margins are commonly used to estimate the generalization ability of machine learning models. We present an empirical study of these margins in artificial neural networks. A global estimate of margin size is usually used in the literature. In this work, we point out seldom considered nuances regarding classification margins. Notably, we demonstrate that some types of training samples are modelled with consistently small margins while affecting generalization in different ways. By showing a link with the minimum distance to a different-target sample and the remoteness of samples from one another, we provide a plausible explanation for this observation. We support our findings with an analysis of fully-connected networks trained on noise-corrupted MNIST data, as well as convolutional networks trained on noise-corrupted CIFAR10 data.},
year = {2022},
journal = {Artificial Intelligence Research (SACAIR 2022), Communications in Computer and Information Science},
volume = {1734},
pages = {78 - 92},
month = {November 2022},
publisher = {Springer, Cham},
doi = {https://doi.org/10.48550/arXiv.2302.06925},
}
“The Role of the Board in Artificial Intelligence Ethics and Governance – A Case
for JSE Listed Companies”
Workshop attended:
1) 23 March 2021 Catalysing cooperation: Working Together Across AI
Governance Initiatives
2) 13 April 2021 ICGAI: Meaning Inclusivity in Governing AI Revolution
3) 19 May 2021 Digital Insight: Bridging the Trust Gap – How to Govern AI
4) 17 June 2021 Intelligent Decisions powered by AI: A critical tool for Digital
Government
5) 23 June Artificial Intelligence: How Secure are your ML and AI projects and
How human bias limits
6) 23 September 2021 Applying AI to tackle the Climate Crisis
Publication:
Who is responsible? AI vs corporate governance and SA law
https://www.bizcommunity.com/Article/196/547/208888.html
Latest Research Publications:
Latest Research Publications:
Latest Research Publications:
@article{521,
author = {Aldrin Ngorima, Albert Helberg, Marelie Davel},
title = {Simplified Temporal Convolutional-Based Channel Estimation for a WiFi Vehicular Communication Channel},
abstract = {},
year = {2025},
journal = {IEEE 3rd Wireless Africa Conference (WAC)},
pages = {1 - 5},
month = {02/2025},
publisher = {IEEE},
address = {Pretoria, South Africa},
isbn = {979-8-3315-1758-8},
doi = {10.1109/WAC63911.2025.10992609},
}
<p><span style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);color:rgb(34, 34, 34);display:inline !important;float:none;font-family:Merriweather, serif;font-size:18px;font-style:normal;font-variant-caps:normal;font-variant-ligatures:normal;font-weight:400;letter-spacing:normal;orphans:2;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Vehicular communication systems face significant challenges due to high mobility and rapidly changing environments, which affect the channel over which the signals travel. To address these challenges, neural network (NN)-based channel estimation methods have been suggested. These methods are primarily trained on high signal-to-noise ratio (SNR) with the assumption that training a NN in less noisy conditions can result in good generalisation. This study examines the effectiveness of training NN-based channel estimators on mixed SNR datasets compared to training solely on high SNR datasets, as seen in several related works. Estimators evaluated in this work include an architecture that uses convolutional layers and self-attention mechanisms; a method that employs temporal convolutional networks and data pilot-aided estimation; two methods that combine classical methods with multilayer perceptrons; and the current state-of-the-art model that combines Long-Short-Term Memory networks with data pilot-aided and temporal averaging methods as post processing. Our results indicate that using only high SNR data for training is not always optimal, and the SNR range in the training dataset should be treated as a hyperparameter that can be adjusted for better performance. This is illustrated by the better performance of some models in low SNR conditions when trained on the mixed SNR dataset, as opposed to when trained exclusively on high SNR data.</span></p>
@article{516,
author = {Aldrin Ngorima, Albert Helberg, Marelie Davel},
title = {Neural Network-Based Vehicular Channel Estimation Performance: Effect of Noise in the Training Set},
abstract = {<p><span style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);color:rgb(34, 34, 34);display:inline !important;float:none;font-family:Merriweather, serif;font-size:18px;font-style:normal;font-variant-caps:normal;font-variant-ligatures:normal;font-weight:400;letter-spacing:normal;orphans:2;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Vehicular communication systems face significant challenges due to high mobility and rapidly changing environments, which affect the channel over which the signals travel. To address these challenges, neural network (NN)-based channel estimation methods have been suggested. These methods are primarily trained&nbsp;on high signal-to-noise ratio (SNR) with the assumption that training&nbsp;a NN in less noisy conditions can result in good generalisation.&nbsp;This study examines the effectiveness of training NN-based channel estimators on mixed SNR datasets compared to training solely on&nbsp;high SNR datasets, as seen in several related works. Estimators evaluated in this work include an architecture that uses convolutional layers and self-attention mechanisms; a method that employs temporal convolutional networks and data pilot-aided estimation; two methods that combine classical methods with multilayer perceptrons; and&nbsp;the current state-of-the-art model that combines Long-Short-Term Memory networks with data pilot-aided and temporal averaging methods&nbsp;as post processing. Our results indicate that using only high SNR&nbsp;data for training is not always optimal, and the SNR range in&nbsp;the training dataset should be treated as a hyperparameter that can&nbsp;be adjusted for better performance. This is illustrated by the better performance of some models in low SNR conditions when trained on&nbsp;the mixed SNR dataset, as opposed to when trained exclusively on&nbsp;high SNR data.</span></p>},
year = {2024},
journal = {Artificial Intelligence Research. SACAIR 2024. Communications in Computer and Information Science},
volume = {vol 2326},
pages = {192 - 206},
month = {12/2024},
publisher = {Springer Nature Switzerland},
address = {Cham},
isbn = {978-3-031-78255-8},
doi = {https://doi.org/10.1007/978-3-031-78255-8_12},
}
<p>In modern communication systems, having an accurate channel estimator is crucial. However, when there is mobility, it becomes difficult to estimate the channel and the pilot signals, which are used for channel estimation, become insufficient. In this paper, we introduce the use of Temporal<br>Convolutional Networks (TCNs) with data pilot-aided (DPA) channel estimation and temporal averaging (TA) to estimate vehicle-to-vehicle same direction with Wall (VTV-SDWW) channels. The TCN-DPA-TA estimator showed an improvement in Bit Error Rate (BER) performance of up to 1 order of magnitude. Furthermore, the BER performance of the TCN-DPA without TA also improved by up to 0.7 magnitude compared to the best classical estimator.</p>
@article{515,
author = {Aldrin Ngorima, Albert Helberg, Marelie Davel},
title = {A Data Pilot-Aided Temporal Convolutional Network for Channel Estimation in IEEE 802.11p Vehicle-to-Vehicle Communications},
abstract = {<p>In modern communication systems, having an accurate channel estimator is crucial. However, when there is mobility, it becomes difficult to estimate the channel and the pilot signals, which are used for channel estimation, become insufficient. In this paper, we introduce the use of Temporal<br>Convolutional Networks (TCNs) with data pilot-aided (DPA) channel estimation and temporal averaging (TA) to estimate vehicle-to-vehicle same direction with Wall (VTV-SDWW) channels. The TCN-DPA-TA estimator showed an improvement in Bit Error Rate (BER) performance of up to 1 order of magnitude. Furthermore, the BER performance of the TCN-DPA without TA also improved by up to 0.7 magnitude compared to the best classical estimator.</p>},
year = {2024},
journal = {Southern Africa Telecommunication Networks and Applications Conference (SATNAC)},
pages = {356–361},
}
Channel estimation is a critical component of vehicular communications systems, especially in high-mobility scenarios. The IEEE 802.11p standard uses preamble-based channel estimation, which is not sufficient in these situations. Recent work has proposed using deep neural networks for channel estimation in IEEE 802.11p. While these methods improved on earlier baselines they still can perform poorly, especially in very high mobility scenarios. This study proposes a novel approach that uses two independent LSTM cells in parallel and averages their outputs to update cell states. The proposed approach improves normalised mean square error, surpassing existing deep learning approaches in very high mobility scenarios.
@inbook{504,
author = {Aldrin Ngorima, Albert Helberg, Marelie Davel},
title = {Sequence Based Deep Neural Networks for Channel Estimation in Vehicular Communication Systems},
abstract = {Channel estimation is a critical component of vehicular communications systems, especially in high-mobility scenarios. The IEEE 802.11p standard uses preamble-based channel estimation, which is not sufficient in these situations. Recent work has proposed using deep neural networks for channel estimation in IEEE 802.11p. While these methods improved on earlier baselines they still can perform poorly, especially in very high mobility scenarios. This study proposes a novel approach that uses two independent LSTM cells in parallel and averages their outputs to update cell states. The proposed approach improves normalised mean square error, surpassing existing deep learning approaches in very high mobility scenarios.},
year = {2023},
journal = {Artificial Intelligence Research. SACAIR 2023. Communications in Computer and Information Science},
volume = {1976},
pages = {176 - 186},
month = {29 November 2023},
publisher = {Springer, Cham},
isbn = {978-3-031-49001-9},
doi = {https://doi.org/10.1007/978-3-031-49002-6_12},
}
Latest Research Publications:
We explore how machine learning (ML) and Bayesian networks (BNs) can be combined in a personal health agent (PHA) for the detection and interpretation of electrocardiogram (ECG) characteristics. We propose a PHA that uses ECG data from wearables to monitor heart activity, and interprets and explains the observed readings. We focus on atrial fibrillation (AF), the commonest type of arrhythmia. The absence of a P-wave in an ECG is the hallmark indication of AF. Four ML models are trained to classify an ECG signal based on the presence or absence of the P-wave: multilayer perceptron (MLP), logistic regression, support vector machine, and random forest. The MLP is the best performing model with an accuracy of 89.61% and an F1 score of 88.68%. A BN representing AF risk factors is developed based on expert knowledge from the literature and evaluated using Pitchforth and Mengersen’s validation framework. The P-wave presence or absence as determined by the ML model is input into the BN. The PHA is evaluated using sample use cases to illustrate how the BN can explain the occurrence of AF using diagnostic reasoning. This gives the most likely AF risk factors for the individual
@inbook{478,
author = {Tezira Wanyana, Mbithe Nzomo, C. Sue Price, Deshen Moodley},
title = {Combining Machine Learning and Bayesian Networks for ECG Interpretation and Explanation},
abstract = {We explore how machine learning (ML) and Bayesian networks (BNs) can be combined in a personal health agent (PHA) for the detection and interpretation of electrocardiogram (ECG) characteristics. We propose a PHA that uses ECG data from wearables to monitor heart activity, and interprets and explains the observed readings. We focus on atrial fibrillation (AF), the commonest type of arrhythmia. The absence of a P-wave in an ECG is the hallmark indication of AF. Four ML models are trained to classify an ECG signal based on the presence or absence of the P-wave: multilayer perceptron (MLP), logistic regression, support vector machine, and random forest. The MLP is the best performing model with an accuracy of 89.61% and an F1 score of 88.68%. A BN representing AF risk factors is developed based on expert knowledge from the literature and evaluated using Pitchforth and Mengersen’s validation framework. The P-wave presence or absence as determined by the ML model is input into the BN. The PHA is evaluated using sample use cases to illustrate how the BN can explain the occurrence of AF using diagnostic reasoning. This gives the most likely AF risk factors for the individual},
year = {2022},
journal = {Proceedings of the 8th International Conference on Information and Communication Technologies for Ageing Well and e-Health - ICT4AWE},
pages = {81-92},
publisher = {SciTePress},
address = {INSTICC},
isbn = {978-989-758-566-1},
doi = {https://doi.org/10.5220/0011046100003188},
}
Latest Research Publications:
Latest Research Publications:
We extend the KLM approach to defeasible reasoning to be applicable to a restricted version of first-order logic. We describe defeasibility for this logic using a set of rationality postulates, provide an appropriate semantics for it, and present a representation result that characterises the semantic description of defeasibility in terms of the rationality postulates. Based on this theoretical core, we then propose a version of defeasible entailment that is inspired by Rational Closure as it is defined for defeasible propositional logic and defeasible description logics. We show that this form of defeasible entailment is rational in the sense that it adheres to our rationality postulates. The work in this paper is the first step towards our ultimate goal of introducing KLM-style defeasible reasoning into the family of Datalog+/- ontology languages.
@{429,
author = {Giovanni Casini, Tommie Meyer, Guy Paterson-Jones},
title = {KLM-Style Defeasibility for Restricted First-Order Logic},
abstract = {We extend the KLM approach to defeasible reasoning to be applicable to a restricted version of first-order logic. We describe defeasibility for this logic using a set of rationality postulates, provide an appropriate semantics for it, and present a representation result that characterises the semantic description of defeasibility in terms of the rationality postulates. Based on this theoretical core, we then propose a version of defeasible entailment that is inspired by Rational Closure as it is defined for defeasible propositional logic and defeasible description logics. We show that this form of defeasible entailment is rational in the sense that it adheres to our rationality postulates. The work in this paper is the first step towards our ultimate goal of introducing KLM-style defeasible reasoning into the family of Datalog+/- ontology languages.},
year = {2021},
journal = {19th International Workshop on Non-Monotonic Reasoning},
pages = {184-193},
month = {03/11/2021-05/11/2021},
address = {Online},
url = {https://drive.google.com/open?id=1WSIl3TOrXBhaWhckWN4NLXoD9AVFKp5R},
}
Propositional KLM-style defeasible reasoning involves extending propositional logic with a new logical connective that can express defeasible (or conditional) implications, with semantics given by ordered structures known as ranked interpretations. KLM-style defeasible entailment is referred to as rational whenever the defeasible entailment relation under consideration generates a set of defeasible implications all satisfying a set of rationality postulates known as the KLM postulates. In a recent paper Booth et al. proposed PTL, a logic that is more expressive than the core KLM logic. They proved an impossibility result, showing that defeasible entailment for PTL fails to satisfy a set of rationality postulates similar in spirit to the KLM postulates. Their interpretation of the impossibility result is that defeasible entailment for PTL need not be unique. In this paper we continue the line of research in which the expressivity of the core KLM logic is extended. We present the logic Boolean KLM (BKLM) in which we allow for disjunctions, conjunctions, and negations, but not nesting, of defeasible implications. Our contribution is twofold. Firstly, we show (perhaps surprisingly) that BKLM is more expressive than PTL. Our proof is based on the fact that BKLM can characterise all single ranked interpretations, whereas PTL cannot. Secondly, given that the PTL impossibility result also applies to BKLM, we adapt the different forms of PTL entailment proposed by Booth et al. to apply to BKLM.
@{413,
author = {Guy Paterson-Jones, Tommie Meyer},
title = {A Boolean Extension of KLM-style Conditional Reasoning},
abstract = {Propositional KLM-style defeasible reasoning involves extending propositional logic with a new logical connective that can express defeasible (or conditional) implications, with semantics given by ordered structures known as ranked interpretations. KLM-style defeasible entailment is referred to as rational whenever the defeasible entailment relation under consideration generates a set of defeasible implications all satisfying a set of rationality postulates known as the KLM postulates. In a recent paper Booth et al. proposed PTL, a logic that is more expressive than the core KLM logic. They proved an impossibility result, showing that defeasible entailment for PTL fails to satisfy a set of rationality postulates similar in spirit to the KLM postulates. Their interpretation of the impossibility result is that defeasible entailment for PTL need not be unique. In this paper we continue the line of research in which the expressivity of the core KLM logic is extended. We present the logic Boolean KLM (BKLM) in which we allow for disjunctions, conjunctions, and negations, but not nesting, of defeasible implications. Our contribution is twofold. Firstly, we show (perhaps surprisingly) that BKLM is more expressive than PTL. Our proof is based on the fact that BKLM can characterise all single ranked interpretations, whereas PTL cannot. Secondly, given that the PTL impossibility result also applies to BKLM, we adapt the different forms of PTL entailment proposed by Booth et al. to apply to BKLM.},
year = {2020},
journal = {First Southern African Conference for AI Research (SACAIR 2020)},
pages = {236-252},
month = {22/02/2021-26/02/2021},
publisher = {Springer},
address = {Muldersdrift, South Africa},
isbn = {978-3-030-66151-9},
url = {https://link.springer.com/book/10.1007/978-3-030-66151-9},
doi = {10.1007/978-3-030-66151-9_15},
}
Propositional KLM-style defeasible reasoning involves a core propositional logic capable of expressing defeasible (or conditional) implications. The semantics for this logic is based on Kripke-like structures known as ranked interpretations. KLM-style defeasible entailment is referred to as rational whenever the defeasible entailment relation under consideration generates a set of defeasible implications all satisfying a set of rationality postulates known as the KLM postulates. In a recent paper Booth et al. proposed PTL, a logic that is more expressive than the core KLM logic. They proved an impossibility result, showing that defeasible entailment for PTL fails to satisfy a set of rationality postulates similar in spirit to the KLM postulates. Their interpretation of the impossibility result is that defeasible entailment for PTL need not be unique.
In this paper we continue the line of research in which the expressivity of the core KLM logic is extended. We present the logic Boolean KLM (BKLM) in which we allow for disjunctions, conjunctions, and negations, but not nesting, of defeasible implications. Our contribution is twofold. Firstly, we show (perhaps surprisingly) that BKLM is more expressive than PTL. Our proof is based on the fact that BKLM can characterise all single ranked interpretations, whereas PTL cannot. Secondly, given that the PTL impossibility result also applies to BKLM, we adapt the different forms of PTL entailment proposed by Booth et al. to apply to BKLM.
@misc{383,
author = {Guy Paterson-Jones, Giovanni Casini, Tommie Meyer},
title = {BKLM - An expressive logic for defeasible reasoning},
abstract = {Propositional KLM-style defeasible reasoning involves a core propositional logic capable of expressing defeasible (or conditional) implications. The semantics for this logic is based on Kripke-like structures known as ranked interpretations. KLM-style defeasible entailment is referred to as rational whenever the defeasible entailment relation under consideration generates a set of defeasible implications all satisfying a set of rationality postulates known as the KLM postulates. In a recent paper Booth et al. proposed PTL, a logic that is more expressive than the core KLM logic. They proved an impossibility result, showing that defeasible entailment for PTL fails to satisfy a set of rationality postulates similar in spirit to the KLM postulates. Their interpretation of the impossibility result is that defeasible entailment for PTL need not be unique.
In this paper we continue the line of research in which the expressivity of the core KLM logic is extended. We present the logic Boolean KLM (BKLM) in which we allow for disjunctions, conjunctions, and negations, but not nesting, of defeasible implications. Our contribution is twofold. Firstly, we show (perhaps surprisingly) that BKLM is more expressive than PTL. Our proof is based on the fact that BKLM can characterise all single ranked interpretations, whereas PTL cannot. Secondly, given that the PTL impossibility result also applies to BKLM, we adapt the different forms of PTL entailment proposed by Booth et al. to apply to BKLM.},
year = {2020},
journal = {18th International Workshop on Non-Monotonic Reasoning},
month = {12/09/2020-24/09/2020},
}
Latest Research Publications:
Latest Research Publications:
Building computational models of agents in dynamic, partially observable and stochastic environments is challenging. We propose a cognitive computational model of sugarcane growers’ daily decision-making to examine sugarcane supply chain complexities. Growers make decisions based on uncertain weather forecasts; cane dryness; unforeseen emergencies; and the mill’s unexpected call for delivery of a different amount of cane. The Belief-Desire-Intention (BDI) architecture has been used to model cognitive agents in many domains, including agriculture. However, typical implementations of this architecture have represented beliefs symbolically, so uncertain beliefs are usually not catered for. Here we show that a BDI architecture, enhanced with a dynamic decision network (DDN), suitably models sugarcane grower agents’ repeated daily decisions. Using two complex scenarios, we demonstrate that the agent selects the appropriate intention, and suggests how the grower should act adaptively and proactively to achieve his goals. In addition, we provide a mapping for using a DDN in a BDI architecture. This architecture can be used for modelling sugarcane grower agents in an agent-based simulation. The mapping of the DDN’s use in the BDI architecture enables this work to be applied to other domains for modelling agents’ repeated decisions in partially observable, stochastic and dynamic environments.
@article{488,
author = {C. Sue Price, Deshen Moodley, Anban Pillay, Gavin Rens},
title = {An adaptive probabilistic agent architecture for modelling sugarcane growers’ decision-making},
abstract = {Building computational models of agents in dynamic, partially observable and stochastic environments is challenging. We propose a cognitive computational model of sugarcane growers’ daily decision-making to examine sugarcane supply chain complexities. Growers make decisions based on uncertain weather forecasts; cane dryness; unforeseen emergencies; and the mill’s unexpected call for delivery of a different amount of cane. The Belief-Desire-Intention (BDI) architecture has been used to model cognitive agents in many domains, including agriculture. However, typical implementations of this architecture have represented beliefs symbolically, so uncertain beliefs are usually not catered for. Here we show that a BDI architecture, enhanced with a dynamic decision network (DDN), suitably models sugarcane grower agents’ repeated daily decisions. Using two complex scenarios, we demonstrate that the agent selects the appropriate intention, and suggests how the grower should act adaptively and proactively to achieve his goals. In addition, we provide a mapping for using a DDN in a BDI architecture. This architecture can be used for modelling sugarcane grower agents in an agent-based simulation. The mapping of the DDN’s use in the BDI architecture enables this work to be applied to other domains for modelling agents’ repeated decisions in partially observable, stochastic and dynamic environments.},
year = {2022},
journal = {South African Computer Journal},
volume = {34},
pages = {152-191},
issue = {1},
url = {https://sacj.cs.uct.ac.za/index.php/sacj/article/view/857},
doi = {https://doi.org/10.18489/sacj.v34i1.857},
}
Depth cameras provide a natural and intuitive user interaction mechanism in virtual reality environments by using hand gestures as the primary user input. However, building robust VR systems that use depth cameras are challenging. Gesture recognition accuracy is affected by occlusion, variation in hand orientation and misclassification of similar hand gestures. This research explores the limits of the Leap Motion depth camera for static hand pose recognition in virtual reality applications. We propose a system for analysing static hand poses and for systematically identifying a pose set that can achieve a near-perfect recognition accuracy. The system consists of a hand pose taxonomy, a pose notation, a machine learning classifier and an algorithm to identify a reliable pose set that can achieve near perfect accuracy levels. We used this system to construct a benchmark hand pose data set containing 2550 static hand pose instances, and show how the algorithm can be used to systematically derive a set of poses that can produce an accuracy of 99% using a Support Vector Machine classifier.
@{379,
author = {Andrew Clark, Anban Pillay, Deshen Moodley},
title = {A system for pose analysis and selection in virtual reality environments},
abstract = {Depth cameras provide a natural and intuitive user interaction mechanism in virtual reality environments by using hand gestures as the primary user input. However, building robust VR systems that use depth cameras are challenging. Gesture recognition accuracy is affected by occlusion, variation in hand orientation and misclassification of similar hand gestures. This research explores the limits of the Leap Motion depth camera for static hand pose recognition in virtual reality applications. We propose a system for analysing static hand poses and for systematically identifying a pose set that can achieve a near-perfect recognition accuracy. The system consists of a hand pose taxonomy, a pose notation, a machine learning classifier and an algorithm to identify a reliable pose set that can achieve near perfect accuracy levels. We used this system to construct a benchmark hand pose data set containing 2550 static hand pose instances, and show how the algorithm can be used to systematically derive a set of poses that can produce an accuracy of 99% using a Support Vector Machine classifier.},
year = {2020},
journal = {SAICSIT '20: Conference of the South African Institute of Computer Scientists and Information Technologists 2020},
pages = {210-216},
month = {14/09/2020},
publisher = {ACM Digital Library},
address = {Virtual},
isbn = {978-1-4503-8847-4},
url = {https://dl.acm.org/doi/proceedings/10.1145/3410886},
}
A dynamic Bayesian decision network was developed to model the preharvest burning decision-making processes of sugarcane growers in a KwaZulu-Natal sugarcane supply chain and extends previous work by Price et al. (2018). This model was created using an iterative development approach. This paper recounts the development and validation process of the third version of the model. The model was validated using Pitchforth and Mengersen (2013)’s framework for validating expert elicited Bayesian networks. During this process, growers and cane supply members assessed the model in a focus group by executing the model, and reviewing the results of a prerun scenario. The participants were generally positive about how the model represented their decision-making processes. However, they identified some issues that could be addressed in the next iteration. Dynamic Bayesian decision networks offer a promising approach to modelling adaptive decisions in uncertain conditions. This model can be used to simulate the cognitive mechanism for a grower agent in a simulation of a sugarcane supply chain.
@{244,
author = {C. Sue Price, Deshen Moodley, Anban Pillay},
title = {Modelling uncertain adaptive decisions: Application to KwaZulu-Natal sugarcane growers},
abstract = {A dynamic Bayesian decision network was developed to model the preharvest burning decision-making processes of sugarcane growers in a KwaZulu-Natal sugarcane supply chain and extends previous work by Price et al. (2018). This model was created using an iterative development approach. This paper recounts the development and validation process of the third version of the model. The model was validated using Pitchforth and Mengersen (2013)’s framework for validating expert elicited Bayesian networks. During this process, growers and cane supply members assessed the model in a focus group by executing the model, and reviewing the results of a prerun scenario. The participants were generally positive about how the model represented their decision-making processes. However, they identified some issues that could be addressed in the next iteration. Dynamic Bayesian decision networks offer a promising approach to modelling adaptive decisions in uncertain conditions. This model can be used to simulate the cognitive mechanism for a grower agent in a simulation of a sugarcane supply chain.},
year = {2019},
journal = {Forum for Artificial Intelligence Research (FAIR2019)},
pages = {145-160},
month = {4/12-6/12},
publisher = {CEUR},
address = {Cape Town},
url = {http://ceur-ws.org/Vol-2540/FAIR2019_paper_53.pdf},
}
Aim/Purpose
The aim of this project was to explore models for stimulating health
informatics innovation and capacity development in South Africa.
Background
There is generally a critical lack of health informatics innovation and capacity in South Africa and sub-Saharan Africa. This is despite the wide anticipation that digital health systems will play a fundamental role in strengthening health systems and improving service delivery
Methodology
We established a program over four years to train Masters and Doctoral students and conducted research projects across a wide range of biomedical and health informatics technologies at a leading South African university. We also developed a Health Architecture Laboratory Innovation and Development Ecosystem (HeAL-IDE) designed to be a long-lasting and potentially reproducible output of the project.
Contribution
We were able to demonstrate a successful model for building innovation and capacity in a sustainable way. Key outputs included: (i)a successful partnership model; (ii) a sustainable HeAL-IDE; (iii) research papers; (iv) a world-class software product and several
demonstrators; and (iv) highly trained staff.
Findings
Our main findings are that: (i) it is possible to create a local ecosystem for innovation and capacity building that creates value for the partners (a university and a private non-profit company); (ii) the ecosystem is able to create valuable outputs that would be much less likely to have been developed singly by each partner, and; (iii) the ecosystem could serve as a powerful model for adoption in other settings.
Recommendations for Practitioners
Non-profit companies and non-governmental organizations implementing health information systems in South Africa and other low resource settings have an opportunity to partner with local universities for purposes of internal capacity development and assisting with the research, reflection and innovation aspects of their projects and programmes.
Recommendation for Researchers
Applied health informatics researchers working in low resource settings could productively partner with local implementing organizations in order to gain a better understanding of the challenges and requirements at field sites and to accelerate the testing and deployment of health information technology solutions.
Impact on Society
This research demonstrates a model that can deliver valuable software products for public health.
Future Research
It would be useful to implement the model in other settings and research whether the model is more generally useful
@{252,
author = {Deshen Moodley, Anban Pillay, Chris Seebregts},
title = {Establishing a Health Informatics Research Laboratory in South Africa},
abstract = {Aim/Purpose
The aim of this project was to explore models for stimulating health
informatics innovation and capacity development in South Africa.
Background
There is generally a critical lack of health informatics innovation and capacity in South Africa and sub-Saharan Africa. This is despite the wide anticipation that digital health systems will play a fundamental role in strengthening health systems and improving service delivery
Methodology
We established a program over four years to train Masters and Doctoral students and conducted research projects across a wide range of biomedical and health informatics technologies at a leading South African university. We also developed a Health Architecture Laboratory Innovation and Development Ecosystem (HeAL-IDE) designed to be a long-lasting and potentially reproducible output of the project.
Contribution
We were able to demonstrate a successful model for building innovation and capacity in a sustainable way. Key outputs included: (i)a successful partnership model; (ii) a sustainable HeAL-IDE; (iii) research papers; (iv) a world-class software product and several
demonstrators; and (iv) highly trained staff.
Findings
Our main findings are that: (i) it is possible to create a local ecosystem for innovation and capacity building that creates value for the partners (a university and a private non-profit company); (ii) the ecosystem is able to create valuable outputs that would be much less likely to have been developed singly by each partner, and; (iii) the ecosystem could serve as a powerful model for adoption in other settings.
Recommendations for Practitioners
Non-profit companies and non-governmental organizations implementing health information systems in South Africa and other low resource settings have an opportunity to partner with local universities for purposes of internal capacity development and assisting with the research, reflection and innovation aspects of their projects and programmes.
Recommendation for Researchers
Applied health informatics researchers working in low resource settings could productively partner with local implementing organizations in order to gain a better understanding of the challenges and requirements at field sites and to accelerate the testing and deployment of health information technology solutions.
Impact on Society
This research demonstrates a model that can deliver valuable software products for public health.
Future Research
It would be useful to implement the model in other settings and research whether the model is more generally useful},
year = {2018},
journal = {Digital Re-imagination Colloquium 2018},
pages = {16 - 24},
month = {13/03 - 15/03},
publisher = {NEMISA},
isbn = {978-0-6399275-0-3},
url = {http://uir.unisa.ac.za/bitstream/handle/10500/25615/Digital%20Skills%20Proceedings%202018.pdf?sequence=1&isAllowed=y},
}
This paper proposes an improved Generalized Regression Neural Network (KGRNN) for the diagnosis of type II diabetes. Dia- betes, a widespread chronic disease, is a metabolic disorder that develops when the body does not make enough insulin or is unable to use insulin effectively. Type II diabetes is the most common type and accounts for an estimated 90% of cases. The novel KGRNN technique reported in this study uses an enhanced K-Means clustering technique (CVE-K-Means) to produce cluster centers (centroids) that are used to train the network. The technique was applied to the Pima Indian diabetes dataset, a widely used benchmark dataset for Diabetes diagnosis. The technique outper- forms the best known GRNN techniques for Type II diabetes diagnosis in terms of classification accuracy and computational time and obtained a classification accuracy of 86% with 83% sensitivity and 87% specificity. The Area Under the Receiver Operating Characteristic Curve (ROC) of 87% was obtained.
@inbook{195,
author = {Moeketsi Ndaba, Anban Pillay, Absalom Ezugwu},
title = {An Improved Generalized Regression Neural Network for Type II Diabetes Classification},
abstract = {This paper proposes an improved Generalized Regression Neural Network (KGRNN) for the diagnosis of type II diabetes. Dia- betes, a widespread chronic disease, is a metabolic disorder that develops when the body does not make enough insulin or is unable to use insulin effectively. Type II diabetes is the most common type and accounts for an estimated 90% of cases. The novel KGRNN technique reported in this study uses an enhanced K-Means clustering technique (CVE-K-Means) to produce cluster centers (centroids) that are used to train the network. The technique was applied to the Pima Indian diabetes dataset, a widely used benchmark dataset for Diabetes diagnosis. The technique outper- forms the best known GRNN techniques for Type II diabetes diagnosis in terms of classification accuracy and computational time and obtained a classification accuracy of 86% with 83% sensitivity and 87% specificity. The Area Under the Receiver Operating Characteristic Curve (ROC) of 87% was obtained.},
year = {2018},
journal = {ICCSA 2018, LNCS 10963},
edition = {10963},
pages = {659-671},
publisher = {Springer International Publishing AG},
isbn = {3319951718},
}


