## ----include = FALSE---------------------------------------------------------- use_saved_results <- TRUE knitr::opts_chunk$set( collapse = TRUE, comment = "#>", echo = TRUE, eval = !use_saved_results, message = FALSE, warning = FALSE ) if (use_saved_results) { results <- readRDS("vignette_pbhp.rds") eval_frame <- results$eval_frame } ## ----eval=TRUE---------------------------------------------------------------- library(dplyr); library(tidyr); library(purrr) # Data wrangling library(ggplot2); library(stringr) # Plotting library(tidyfit) # Auto-ML modeling ## ----eval=TRUE---------------------------------------------------------------- data <- MASS::Boston # For reproducibility set.seed(123) ix_tst <- sample(1:nrow(data), round(nrow(data)*0.5)) data_trn <- data[-ix_tst,] data_tst <- data[ix_tst,] as_tibble(data) ## ----eval=TRUE---------------------------------------------------------------- model_frame <- data_trn %>% regress(medv ~ ., OLS = m("lm")) ## ----eval=TRUE---------------------------------------------------------------- assess <- function(model_frame, data_trn, data_tst) { oos <- model_frame %>% predict(data_tst) %>% group_by(model, .add = TRUE) %>% yardstick::rsq_trad(truth, prediction) %>% mutate(type = "Out-of-sample") %>% arrange(desc(.estimate)) is <- model_frame %>% predict(data_trn) %>% group_by(model, .add = TRUE) %>% yardstick::rsq_trad(truth, prediction) %>% mutate(type = "In-sample") return(bind_rows(oos, is)) } plotter <- function(df) { df %>% mutate(lab = round(.estimate, 2)) %>% mutate(model = str_wrap(model, 12)) %>% mutate(model = factor(model, levels = unique(.$model))) %>% ggplot(aes(model, .estimate)) + geom_point(aes(color = type), size = 2.5, shape = 4) + geom_label(aes(label = lab, color = type), size = 2, nudge_x = 0.35) + theme_bw() + scale_color_manual(values = c("firebrick", "darkblue")) + theme(legend.title = element_blank(), axis.title.x = element_blank()) + coord_cartesian(ylim = c(0.65, 0.95)) } ## ----fig.width=3, fig.height=3, fig.align="center", eval=TRUE----------------- model_frame %>% assess(data_trn, data_tst) %>% plotter ## ----fig.width=3, fig.height=3, fig.align="center", eval=TRUE----------------- model_frame <- data_trn %>% regress(medv ~ .*., OLS = m("lm")) model_frame %>% assess(data_trn, data_tst) %>% plotter ## ----echo = F, eval=TRUE------------------------------------------------------ library(kableExtra) tbl <- data.frame( `Best subset` = c("Exhaustive search (`leaps`)", "Forward selection (`leaps`)", "Backward selection (`leaps`)", "Sequential Replacement (`leaps`)", "Genetic Algorithm ('gaselect')"), `L1/L2 penalized` = c("Lasso (`glmnet`)", "Ridge (`glmnet`)", "ElasticNet (`glmnet`)", "Adaptive Lasso (`glmnet`)", ""), `Latent factors` = c("Principal components regression (`pls`)", "Partial least squares (`pls`)", "", "", ""), `Machine Learning` = c("Gradient Boosting (`mboost`)", "Hierarchical feature regression (`hfr`)", "Support vector regression (`e1071`)", "", ""), `Bayesian` = c("Bayesian regression (`arm`)", "Bayesian model averaging (`BMS`)", "Bayesian Lasso (`monomvn`)", "Bayesian Ridge (`monomvn`)", "Spike and Slab Regression ('BoomSpikeSlab')"), check.names = F ) kbl(tbl, align = "ccccc") %>% kable_styling("striped") ## ----------------------------------------------------------------------------- # model_frame_all <- data_trn %>% # regress(medv ~ .*., # OLS = m("lm"), # BAYES = m("bayes"), # BMA = m("bma", iter = 10000), # SEQREP = m("subset", method = "seqrep", IC = "AIC"), # `FORWARD SELECTION` = m("subset", method = "forward", IC = "AIC"), # `BACKWARD SELECTION` = m("subset", method = "backward", IC = "AIC"), # LASSO = m("lasso"), # BLASSO = m("blasso"), # SPIKESLAB = m("spikeslab", niter = 10000), # RIDGE = m("ridge"), # BRIDGE = m("bridge"), # ELASTICNET = m("enet"), # ADALASSO = m("adalasso", lambda_ridge = c(0.001, 0.01, 0.1)), # PCR = m("pcr"), # PLSR = m("plsr"), # HFR = m("hfr"), # `GRADIENT BOOSTING` = m("boost"), # SVR = m("svm"), # GENETIC = m("genetic", populationSize = 1000, numGenerations = 50, statistic = "AIC", maxVariables = 20), # .cv = "vfold_cv", .cv_args = list(v = 10)) ## ----------------------------------------------------------------------------- # eval_frame <- model_frame_all %>% # assess(data_trn, data_tst) ## ----fig.width=7.25, fig.height=4.25, fig.align="center", eval=TRUE----------- eval_frame %>% plotter + theme(legend.position = "top", axis.text.x = element_text(angle = 90))