Cell Rangerソフトウェアやヒト・マウスのpre-buildリファレンスデータを
Downloadsサイトから入手する.
次に
Installガイドに従い、ソフトウェアをインストールする.
必要に応じて
cellanger mkrefコマンドにより、カスタムリファレンスを作成する.
cellranger countコマンドにより解析(Mappingとカウント)を実行する.
解析実施例
#@ COUNT / include_intron (default) homd=/home/tom/.cellranger mus=$homd/refdata-gex-mm10-2020-A hs=$homd/refdata-gex-GRCh38-2020-A dir_in=AGED dir_out=MAP prefix=Aged cpu=10 memory=128 cellranger count \ --id=$dir_out \ --fastqs=$dir_in \ --sample=$prefix \ --transcriptome=$mus \ --localcores=$cpu \ --localmem=$memory
カスタムリファレンスの作成例
#@ custom reference # Get GTF/GENOME from ENSEMBL and uncompress # Filter GTF cellranger mkgtf \ a.gtf \ out.gtf \ --attribute=gene_biotype:protein_coding # Make Ref cellranger mkref \ --genome=outdir \ --fasta=g.fa \ --genes=out.gtf
library(Seurat) library(ggplot2)
so <- CreateSeuratObject(Read10X("CELLRANGER_DIR"), min.cells = 3)
so[["percent.mt"]] <- PercentageFeatureSet(so, pattern="^mt-") VlnPlot(so, features=c("nFeature_RNA", "nCount_RNA", "percent.mt"))
so <- subset(so, subset=nCount_RNA > 1000 & percent.mt > 20)
so <- NormalizeData(so) so <- FindVariableFeatures(so, selection.method="vst", nfeatures=2000) so <- ScaleData(so, features = rownames(so))
so <- RunPCA(so, features = VariableFeatures(object = so)) ElbowPlot(so)
so <- FindNeighbors(so, dims = 1:20) so <- FindClusters(so, resolution = 0.5) so <- RunUMAP(so, dims = 1:20)
DimPlot(so, reduction = "umap", label = TRUE, pt.size = .8, label.size = 8)
VlnPlot(so, features=c("INS", "GCG")) FeaturePlot(so, features=c("INS", "GCG"))
FindMarkers(so, ident.1 = "0", ident.2 = c("1", "2"))
saveRDS(so, file = "so.rds")
conda install -c conda-forge r-remotes remotes::install_github('chris-mcginnis-ucsf/DoubletFinder')
library(Seurat) require(DoubletFinder)
以下のデータはprefiltering, scaling, PCA, UMAP等を解析済みもの
Aggregateしたデータは使用しない (ex. Ctrlデータ + 介入データ)
so <- readRDS("tmp_a.rds")
so_tmp_1 <- paramSweep_v3(so, PCs = 1:16) so_tmp_2 <- summarizeSweep(so_tmp_1, GT = FALSE) bcmvn <- find.pK(so_tmp_2) barplot(bcmvn$BCmetric, names.arg=bcmvn$pK,las = 2) # Bcmvn ~ Mean-variance normalized bimodality coefficient # 下図のMax値である0.02を次式のpKに設定
# Set Doublets numbers nExp <- round(ncol(so) * 0.10) # doubletsを10%と仮定 # 10x社の細胞インプット数と回収数の対応表を参照して決定 so <- doubletFinder_v3(so, pN = 0.25, pK = 0.02, nExp = nExp, PCs = 1:16) # PCs ~ The number of statistically-significant principal components # # pN ~ This defines the number of generated artificial doublets, # expressed as a proportion of the merged real-artificial data. # Default is set to 25%, based on observation that # DoubletFinder performance is largely pN-invariant # # pK ~ This defines the PC neighborhood size used to compute pANN, # expressed as a proportion of the merged real-artificial data. # No default is set, as pK should be adjusted for each scRNA-seq dataset. # # nExp ~ This defines the pANN threshold used to make final doublet/singlet predictions.
DF.name = colnames(so@meta.data)[grepl("DF.classification", colnames(so@meta.data))] DimPlot(so, group.by = DF.name, pt.size = 1.2) VlnPlot(so,features = "nFeature_RNA", group.by = DF.name, pt.size = 1.2)
so_filtered = so[,so@meta.data[,DF.name] == "Singlet"]
In the folling example, Test data is Seurat object and reference data is SingleCellExperiment object. Cell annotation is stocked at LABEL column of reference data.
library(SingleR) library(SingleCellExperiment) library(Seurat)
so <- readRDS("SO.rds") sceT <- as.SingleCellExperiment(so) sceR <- readRDS("SCE.rds")
pred <- SingleR( sceT, sceR, sceR$LABEL, assay.type.test = 1 )
so$pred <- pred$labels
In the folling example, Test data is Seurat object and reference data is SingleCellExperiment object. Cell annotation is stocked at LABEL column of reference data.
library(SingleR) library(SingleCellExperiment) library(Seurat) library(scran)
so <- readRDS("SO.rds") sceT <- as.SingleCellExperiment(so) sceR <- readRDS("SCE.rds")
pred <- SingleR( sceT, sceR, sceR$LABEL, de.method = "wilcox", de.n = 50 )
so$pred <- pred$labels
head(sort(table(pred$labels), decreasing = TRUE))
to.remove <- is.na(pred$pruned.labels) table(Label = pred$labels, Removed = to.remove)
to.remove <- pruneScores(pred, min.diff.med = 0.2) table(Label = pred$labels, Removed = to.remove)
plotScoreHeatmap(pred)
plotDeltaDistribution(pred)
meta data include cell annotation LABEL.
library(SingleCellExperiment) library(scuttle) # logNormCounts()
count <- read.table("CountData") meta <- read.table("MetaData")
sce <- SingleCellExperiment( assays = list( counts = as.matrix (count)), colData = meta ) # Remove cells that do not belong to any Clusters (if needed) # For example: sce <- sce[,sce$LABEL != ""]
# IMPORTANT!: For SingleR, raw counts MUST be converted to logNorm data sce <- logNormCounts(sce)
saveRDS(sce, "SCE.rds")
suppressPackageStartupMessages(library(CellChat)) suppressPackageStartupMessages(library(Seurat))
so <- readRDS("so2.rds")
so2cc <- function( so, split.by = "idents", assay = "RNA", slot = "data", animal = "human", # mouse min.cells = 10 ){ # This function convert integrated seurat object with multiple conditions # to Cellchat object, folliwing by basic processing. # convert to cellchat object L <- SplitObject(so, split.by = split.by) cc <- list() for(i in 1:length(L)){ data <- GetAssayData(L[[i]], assay = assay, slot = slot) labels <- Idents(L[[i]]) meta <- data.frame(labels = labels, row.names = names(labels)) cellchat <- createCellChat(data, meta = meta, group.by = "labels") # basic processing of cellchat object if(animal == "mouse") { cellchat@DB <- CellChatDB.mouse } else { cellchat@DB <- CellChatDB.human } cellchat <- subsetData(cellchat) cellchat <- identifyOverExpressedGenes(cellchat) cellchat <- identifyOverExpressedInteractions(cellchat) cellchat <- computeCommunProb(cellchat) cellchat <- filterCommunication(cellchat, min.cells = min.cells) cellchat <- computeCommunProbPathway(cellchat) cellchat <- aggregateNet(cellchat) cellchat <- netAnalysis_computeCentrality(cellchat, slot.name = "netP") cc[[i]] <- cellchat } # merge names(cc) <- names(L) # output return(cc) }
cc <- so2cc(so, split.by = "src", animal = "mouse")
saveRDS(cc, "cc.rds")
suppressPackageStartupMessages(library(CellChat))
cc <- readRDS("cc.rds")
if(length(cc)!=1){ cellchat <- mergeCellChat(cc, add.names = names(cc)) } else { cellchat <- cc[[1]] } # cc is used later
netVisual_diffInteraction( cellchat, comparison = c(1 , 2), # 2nd vs 1st : up(red), down (blue) weight.scale = T, measure = "weight" )
netVisual_heatmap( cellchat, comparison = c(1, 2), measure = "weight" )
num.link <- sapply( cc, function(x) {rowSums(x@net$count) + colSums(x@net$count)-diag(x@net$count)} ) weight.MinMax <- c(min(num.link), max(num.link)) # control the dot size in the different datasets gg <- list() for (i in 1:length(cc)) { gg[[i]] <- netAnalysis_signalingRole_scatter( cc[[i]], title = names(cc)[i], weight.MinMax = weight.MinMax) }
gg[[2]]
netAnalysis_signalingChanges_scatter( cellchat, comparison = c(1, 2), idents.use = "beta" )
netVisual_bubble( cellchat, sources.use = "beta", #targets.use = "alpha", comparison = c(1, 2), angle= 45 )
suppressPackageStartupMessages(library(CellChat))
cc <- readRDS("cc2.rds") if(length(cc)!=1){ cellchat <- mergeCellChat(cc, add.names = names(cc)) } else { cellchat <- cc[[1]] }
Ns = as.numeric(table(cellchat@idents)) M = cellchat@net$weight netVisual_circle( M, vertex.weight = Ns, weight.scale = T )
# Each Pairs for (i in 1:nrow(M)) { M2 <- matrix(0, nrow = nrow(M), ncol = ncol(M), dimnames = dimnames(M)) # size-matched empty matrix M2[i, ] <- M[i, ] netVisual_circle( M2, vertex.weight = Ns, weight.scale = T, edge.weight.max = max(M) # NEED ) }
# Signaling pathways showing significant communication cellchat@netP$pathways
sigP = c("VEGF") LR = extractEnrichedLR(cellchat, sigP) head(LR)
# show all the significant interactions (L-R pairs) # associated with certain signaling pathways netVisual_bubble( cellchat, signaling = sigP )
netVisual_aggregate( cellchat, sigP, layout = "circle" # or layout = "chord" )
# Circle plot (One L-R Pair) netVisual_individual(cellchat, sigP, pairLR.use = LR[2,], layout = "circle")
# Significant interactions (L-R pairs) # from some cell groups ('sources.use') to other cell groups ('targets.use') netVisual_bubble( cellchat, sources.use = "beta", #targets.use = c("alpha", "delta") )
plotGeneExpression(cellchat, signaling = sigP) # if you displayy all genes, specify enriched.only = FALSE
suppressPackageStartupMessages(library(Seurat)) suppressPackageStartupMessages(library(SeuratWrappers)) suppressPackageStartupMessages(library(monocle3))
so <- readRDS("*.rds")
cds <- as.cell_data_set(so) cds <- estimate_size_factors(cds) cds <- cluster_cells(cds, reduction_method = "UMAP") cds <- learn_graph(cds, use_partition = T)
plot_cells(cds, color_cells_by = "partition", label_principal_point = T, graph_label_size = 6, cell_size = .5)
plot_cells(cds, color_cells_by = "partition", graph_label_size = 6, cell_size = 1)
cds <- order_cells(cds, root_pr_nodes = c("Y_*"))
plot_cells(cds, color_cells_by = "pseudotime", show_trajectory_graph = T, graph_label_size = 6, cell_size = 1)
genes <- c("Col1a1") sub_cds <- cds[row.names(cds) %in% genes,]
plot_genes_in_pseudotime(sub_cds, label_by_short_name = F, color_cells_by = "seurat_clusters", min_expr = 0.1)