## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, message=FALSE, warning=FALSE-------------------------------------- library(harmony) library(Seurat) library(dplyr) library(cowplot) ## ----eval=FALSE--------------------------------------------------------------- # install.packages('harmony') ## ----------------------------------------------------------------------------- ## Source required data data("pbmc_stim") pbmc <- CreateSeuratObject(counts = cbind(pbmc.stim, pbmc.ctrl), project = "PBMC", min.cells = 5) ## Separate conditions pbmc@meta.data$stim <- c(rep("STIM", ncol(pbmc.stim)), rep("CTRL", ncol(pbmc.ctrl))) ## ----eval = FALSE, class.source='fold-hide'----------------------------------- # library(Matrix) # ## Download and extract files from GEO # ##setwd("/path/to/downloaded/files") # genes = read.table("GSE96583_batch2.genes.tsv.gz", header = FALSE, sep = "\t") # # pbmc.ctrl.full = as.readMM("GSM2560248_2.1.mtx.gz") # colnames(pbmc.ctrl.full) = paste0(read.table("GSM2560248_barcodes.tsv.gz", header = FALSE, sep = "\t")[,1], "-1") # rownames(pbmc.ctrl.full) = genes$V1 # # pbmc.stim.full = readMM("GSM2560249_2.2.mtx.gz") # colnames(pbmc.stim.full) = paste0(read.table("GSM2560249_barcodes.tsv.gz", header = FALSE, sep = "\t")[,1], "-2") # rownames(pbmc.stim.full) = genes$V1 # # library(Seurat) # # pbmc <- CreateSeuratObject(counts = cbind(pbmc.stim.full, pbmc.ctrl.full), project = "PBMC", min.cells = 5) # pbmc@meta.data$stim <- c(rep("STIM", ncol(pbmc.stim.full)), rep("CTRL", ncol(pbmc.ctrl.full))) # # # # # # Running Harmony # # Harmony works on an existing matrix with cell embeddings and outputs its transformed version with the datasets aligned according to some user-defined experimental conditions. By default, harmony will look up the `pca` cell embeddings and use these to run harmony. Therefore, it assumes that the Seurat object has these embeddings already precomputed. # # ## Calculate PCA cell embeddings # # Here, using `Seurat::NormalizeData()`, we will be generating a union of highly variable genes using each condition (the control and stimulated cells). These features are going to be subsequently used to generate the 20 PCs with `Seurat::RunPCA()`. # ## ----------------------------------------------------------------------------- pbmc <- pbmc %>% NormalizeData(verbose = FALSE) VariableFeatures(pbmc) <- split(row.names(pbmc@meta.data), pbmc@meta.data$stim) %>% lapply(function(cells_use) { pbmc[,cells_use] %>% FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>% VariableFeatures() }) %>% unlist %>% unique pbmc <- pbmc %>% ScaleData(verbose = FALSE) %>% RunPCA(features = VariableFeatures(pbmc), npcs = 20, verbose = FALSE) ## ---- eval=FALSE-------------------------------------------------------------- # ## run harmony with default parameters # pbmc <- pbmc %>% RunHarmony("stim") # ## is equivalent to: # pbmc <- RunHarmony(pbmc, "stim") ## ---- fig.width = 4, fig.height = 3, fig.align = "center", out.width="50%", fig.cap="By setting `plot_converge=TRUE`, harmony will generate a plot with its objective showing the flow of the integration. Each point represents the cost measured after a clustering round. Different colors represent different Harmony iterations which is controlled by `max_iter` (assuming that early_stop=FALSE). Here `max_iter=10` and up to 10 correction steps are expected. However, `early_stop=TRUE` so harmony will stop after the cost plateaus."---- pbmc <- pbmc %>% RunHarmony("stim", plot_convergence = TRUE, nclust = 50, max_iter = 10, early_stop = T) ## ----------------------------------------------------------------------------- harmony.embeddings <- Embeddings(pbmc, reduction = "harmony") ## ---- fig.width=7, fig.height=3, out.width="100%", fig.align="center", fig.cap="Evaluate harmonization of stim parameter in the harmony generated cell embeddings"---- p1 <- DimPlot(object = pbmc, reduction = "harmony", pt.size = .1, group.by = "stim") p2 <- VlnPlot(object = pbmc, features = "harmony_1", group.by = "stim", pt.size = .1) plot_grid(p1,p2) ## ---- fig.width = 6, fig.height=3, out.width="100%"--------------------------- DimHeatmap(object = pbmc, reduction = "harmony", cells = 500, dims = 1:3) ## ----------------------------------------------------------------------------- pbmc <- pbmc %>% FindNeighbors(reduction = "harmony") %>% FindClusters(resolution = 0.5) ## ---- fig.width=5, fig.height=2.5, fig.align="center", fig.cap="t-SNE Visualization of harmony embeddings"---- pbmc <- pbmc %>% RunTSNE(reduction = "harmony") p1 <- DimPlot(pbmc, reduction = "tsne", group.by = "stim", pt.size = .1) p2 <- DimPlot(pbmc, reduction = "tsne", label = TRUE, pt.size = .1) plot_grid(p1, p2) ## ---- fig.width = 7, fig.height = 7, out.width="100%", fig.cap="Expression of gene panel heatmap in the harmonized PBMC dataset"---- FeaturePlot(object = pbmc, features= c("CD3D", "SELL", "CREM", "CD8A", "GNLY", "CD79A", "FCGR3A", "CCL2", "PPBP"), min.cutoff = "q9", cols = c("lightgrey", "blue"), pt.size = 0.5) ## ---- fig.width=5, fig.height=2.5, fig.align="center", fig.cap="UMAP Visualization of harmony embeddings"---- pbmc <- pbmc %>% RunUMAP(reduction = "harmony", dims = 1:20) p1 <- DimPlot(pbmc, reduction = "umap", group.by = "stim", pt.size = .1) p2 <- DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = .1) plot_grid(p1, p2) ## ----------------------------------------------------------------------------- sessionInfo()