## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----eval = FALSE------------------------------------------------------------- # library(metasnf) # # # Start by making a data list containing all our dataframes to more easily # # identify subjects without missing data # full_data_list <- generate_data_list( # list(subc_v, "subcortical_volume", "neuroimaging", "continuous"), # list(income, "household_income", "demographics", "continuous"), # list(pubertal, "pubertal_status", "demographics", "continuous"), # list(anxiety, "anxiety", "behaviour", "ordinal"), # list(depress, "depressed", "behaviour", "ordinal"), # uid = "unique_id" # ) # # # Partition into a data and target list (optional) # data_list <- full_data_list[1:3] # target_list <- full_data_list[4:5] # # # Build space of settings to cluster over # set.seed(42) # settings_matrix <- generate_settings_matrix( # data_list, # nrow = 20, # min_k = 20, # max_k = 50 # ) # # # Clustering # solutions_matrix <- batch_snf(data_list, settings_matrix) # # # Calculate p-values between cluster solutions and features # extended_solutions_matrix <- extend_solutions( # solutions_matrix, # data_list = data_list, # target_list = target_list, # min_pval = 1e-10 # p-values below 1e-10 will be thresholded to 1e-10 # ) ## ----eval = FALSE------------------------------------------------------------- # esm_manhattan <- esm_manhattan_plot( # extended_solutions_matrix[1:5, ], # neg_log_pval_thresh = 5, # threshold = 0.05, # point_size = 3, # jitter_width = 0.1, # jitter_height = 0.1, # plot_title = "Feature-Solution Associations", # text_size = 14, # bonferroni_line = TRUE # ) # # ggplot2::ggsave( # "esm_manhattan.png", # esm_manhattan, # height = 5, # width = 8, # dpi = 100 # ) ## ----eval = FALSE------------------------------------------------------------- # # Calculate pairwise similarities between cluster solutions # solutions_matrix_aris <- calc_aris(solutions_matrix) # # # Extract hierarchical clustering order of the cluster solutions # meta_cluster_order <- get_matrix_order(solutions_matrix_aris) # # # Create a base heatmap for visual meta clustering # ari_hm <- adjusted_rand_index_heatmap( # solutions_matrix_aris, # order = meta_cluster_order # ) # # # Identify meta cluster boundaries # # This can also be by trial & error if you do not wish to use the shiny app. # shiny_annotator(ari_hm) # # # Result of meta cluster examination # split_vec <- c(2, 5, 12, 17) # # # Create a base heatmap for visual meta clustering # ari_hm <- adjusted_rand_index_heatmap( # solutions_matrix_aris, # order = meta_cluster_order, # split_vector = split_vec # ) # # # Extracting representative solutions from each defined meta cluster # rep_solutions <- get_representative_solutions( # solutions_matrix_aris, # split_vector = split_vec, # order = meta_cluster_order, # extended_solutions_matrix # ) # # mc_manhattan <- mc_manhattan_plot( # rep_solutions, # data_list = data_list, # target_list = target_list, # point_size = 3, # text_size = 12, # plot_title = "Feature-Meta Cluster Associations", # threshold = 0.05, # neg_log_pval_thresh = 5 # ) # # ggplot2::ggsave( # "mc_manhattan_clean.png", # mc_manhattan, # height = 10, # width = 10, # dpi = 100 # ) ## ----eval = FALSE------------------------------------------------------------- # var_manhattan <- var_manhattan_plot( # data_list, # key_var = "household_income", # plot_title = "Correlation of Features with Household Income", # text_size = 16, # neg_log_pval_thresh = 3, # threshold = 0.05 # ) # ggplot2::ggsave( # "var_manhattan.png", # var_manhattan, # height = 7, # width = 12, # dpi = 100 # )