@conference{404, keywords = {Generalization, Meta-model, Mismatched distributions, Robustness, Machine learning, Tree-based models}, author = {Dylan Lamprecht and Etienne Barnard}, title = {Using a meta-model to compensate for training-evaluation mismatches}, abstract = {One of the fundamental assumptions of machine learning is that learnt models are applied to data that is identically distributed to the training data. This assumption is often not realistic: for example, data collected from a single source at different times may not be distributed identically, due to sampling bias or changes in the environment. We propose a new architecture called a meta-model which predicts performance for unseen models. This approach is applicable when several ‘proxy’ datasets are available to train a model to be deployed on a ‘target’ test set; the architecture is used to identify which regression algorithms should be used as well as which datasets are most useful to train for a given target dataset. Finally, we demonstrate the strengths and weaknesses of the proposed meta-model by making use of artificially generated datasets using a variation of the Friedman method 3 used to generate artificial regression datasets, and discuss real-world applications of our approach.}, year = {2020}, journal = {Southern African Conference for Artificial Intelligence Research}, chapter = {321-334}, month = {22/02/2021 - 26/02/2021}, address = {South Africa}, isbn = {978-0-620-89373-2}, url = {https://sacair.org.za/proceedings/}, }