scRNA-seq


SECTION 1:10X Genomics

シングルセル解析

解析機器: Chromium iX/X (新型・スケーラブル)| Chromium Controller(旧型)| Chromium Connect
試薬キット類: Single Cell Gene Expression| Single Cell Immune Profiling| Single Cell Multiome ATAC + Gene Expression| Single Cell ATAC
データ解析ソフトウェア: Cell Ranger (遺伝子発現・免疫細胞)| Cell Ranger ATAC (ATAC-seq)| Cell Ranger ARC (遺伝子発現+ATAC-seq)

空間遺伝子発現解析

解析機器: Xenium Analyzer| Visium CytAssyst
データ解析ソフトウェア: Xenium Explorer| Space Ranger

Link

SECTION 2:Cell Ranger

Tutorials一覧

Cell Rangerソフトウェアやヒト・マウスのpre-buildリファレンスデータを Downloadsサイトから入手する.
次に Installガイドに従い、ソフトウェアをインストールする.
必要に応じて cellanger mkrefコマンドにより、カスタムリファレンスを作成する.
cellranger countコマンドにより解析(Mappingとカウント)を実行する.

解析実施例

  #@ COUNT / include_intron (default)
  homd=/home/tom/.cellranger
  mus=$homd/refdata-gex-mm10-2020-A
  hs=$homd/refdata-gex-GRCh38-2020-A

  dir_in=AGED
  dir_out=MAP
  prefix=Aged
  cpu=10
  memory=128
  
  cellranger count       \
    --id=$dir_out        \
    --fastqs=$dir_in     \
    --sample=$prefix     \
    --transcriptome=$mus \
    --localcores=$cpu    \
    --localmem=$memory

カスタムリファレンスの作成例

  #@ custom reference
  # Get GTF/GENOME from ENSEMBL and uncompress
  # Filter GTF
  cellranger mkgtf \
    a.gtf          \
    out.gtf        \
    --attribute=gene_biotype:protein_coding

  # Make Ref
  cellranger mkref  \
    --genome=outdir \
    --fasta=g.fa    \
    --genes=out.gtf

SECTION 3:Space Ranger

spaceranger count

SECTION 4:Seurat

Tutorials| Cheat Sheet| API| FAQ

load library

  library(Seurat)
  library(ggplot2)

Create SO

CreateSeuratObject| Read10X
  so <- CreateSeuratObject(Read10X("CELLRANGER_DIR"), min.cells = 3)

Metrics

PercentageFeatureSet| VlnPlot
  so[["percent.mt"]] <- PercentageFeatureSet(so, pattern="^mt-")
  VlnPlot(so, features=c("nFeature_RNA", "nCount_RNA", "percent.mt"))

Subset

subset
  so <- subset(so, subset=nCount_RNA > 1000 & percent.mt > 20)

Norm / HVGs / Scale

NormalizeData FindVariableFeatures ScaleData
  so <- NormalizeData(so)
  so <- FindVariableFeatures(so, selection.method="vst", nfeatures=2000)
  so <- ScaleData(so, features = rownames(so))

PCA

  so <- RunPCA(so, features = VariableFeatures(object = so))
  ElbowPlot(so)

UMAP

  so <- FindNeighbors(so, dims = 1:20)
  so <- FindClusters(so, resolution = 0.5)
  so <- RunUMAP(so, dims = 1:20)

Plot

  DimPlot(so, reduction = "umap", label = TRUE, pt.size = .8, label.size = 8)
  VlnPlot(so, features=c("INS", "GCG"))
  FeaturePlot(so, features=c("INS", "GCG"))

DEGs

  FindMarkers(so, ident.1 = "0", ident.2 = c("1", "2"))

Save

  saveRDS(so, file = "so.rds")

SECTION 5:DoubletFinder

INSTALL

conda install -c conda-forge r-remotes
remotes::install_github('chris-mcginnis-ucsf/DoubletFinder')

LOAD

library(Seurat)
require(DoubletFinder)

INPUT

以下のデータはprefiltering, scaling, PCA, UMAP等を解析済みもの
Aggregateしたデータは使用しない (ex. Ctrlデータ + 介入データ)

  so <- readRDS("tmp_a.rds")

PK identification (Ground Truthを使用しない場合)

  so_tmp_1 <- paramSweep_v3(so, PCs = 1:16)
  so_tmp_2 <- summarizeSweep(so_tmp_1, GT = FALSE)
  bcmvn <- find.pK(so_tmp_2)
  barplot(bcmvn$BCmetric, names.arg=bcmvn$pK,las = 2)

    # Bcmvn ~ Mean-variance normalized bimodality coefficient
    # 下図のMax値である0.02を次式のpKに設定

Find Doublets

  # Set Doublets numbers
  nExp <- round(ncol(so) * 0.10)

    # doubletsを10%と仮定
    # 10x社の細胞インプット数と回収数の対応表を参照して決定

  so <- doubletFinder_v3(so,
                         pN = 0.25,
                         pK = 0.02,
                         nExp = nExp,
                         PCs = 1:16)

    # PCs ~ The number of statistically-significant principal components
    #
    # pN ~ This defines the number of generated artificial doublets,
    # expressed as a proportion of the merged real-artificial data.
    # Default is set to 25%, based on observation that
    # DoubletFinder performance is largely pN-invariant
    #
    # pK ~ This defines the PC neighborhood size used to compute pANN,
    # expressed as a proportion of the merged real-artificial data.
    # No default is set, as pK should be adjusted for each scRNA-seq dataset.
    #
    # nExp ~ This defines the pANN threshold used to make final doublet/singlet predictions.

Rename DF.name and Visualize

  DF.name = colnames(so@meta.data)[grepl("DF.classification", colnames(so@meta.data))]
  DimPlot(so, group.by = DF.name, pt.size = 1.2)
  VlnPlot(so,features = "nFeature_RNA", group.by = DF.name, pt.size = 1.2)

SUBSET

  so_filtered = so[,so@meta.data[,DF.name] == "Singlet"]

SECTION 6:SingleR

Case1: Reference is NOT scRNA-seq data (for example: HumanPrimaryCell Atlas)

In the folling example, Test data is Seurat object and reference data is SingleCellExperiment object. Cell annotation is stocked at LABEL column of reference data.

Load Library

  library(SingleR)
  library(SingleCellExperiment)
  library(Seurat)

Input data

  so   <- readRDS("SO.rds")
  sceT <- as.SingleCellExperiment(so)
  sceR <- readRDS("SCE.rds")

SingleR

  pred <- SingleR(
                   sceT, 
                   sceR, 
                   sceR$LABEL, 
                   assay.type.test = 1
  )

Transfer the result to seurat object

  so$pred <- pred$labels

Case2: Reference is scRNA-seq data

In the folling example, Test data is Seurat object and reference data is SingleCellExperiment object. Cell annotation is stocked at LABEL column of reference data.

Load Library

  library(SingleR)
  library(SingleCellExperiment)
  library(Seurat)
  library(scran)

input data

  so   <- readRDS("SO.rds")
  sceT <- as.SingleCellExperiment(so)
  sceR <- readRDS("SCE.rds")

SingleR

  pred <- SingleR(
                     sceT, 
                     sceR, 
                     sceR$LABEL,
                     de.method = "wilcox",
                     de.n = 50
  )

Transfer the result to seurat object

  so$pred <- pred$labels

Explore the Result

Table format 1

  head(sort(table(pred$labels), decreasing = TRUE))

Table format 2 (no NA data)

  to.remove <- is.na(pred$pruned.labels)
  table(Label = pred$labels, Removed = to.remove)

Table format 3 (remove data by the specified threshold)

  to.remove <- pruneScores(pred, min.diff.med = 0.2)
  table(Label = pred$labels, Removed = to.remove)

Visualization 1

  plotScoreHeatmap(pred)

Visualization 2

  plotDeltaDistribution(pred)

Make SCE object from Count matrix for SingleR

meta data include cell annotation LABEL.

Load Library

  library(SingleCellExperiment)
  library(scuttle)  # logNormCounts()

input data

  count <- read.table("CountData")
  meta  <- read.table("MetaData")

Make SingleCellExperiment object from count Matrix

  sce <- SingleCellExperiment(
    assays  = list( counts = as.matrix (count)),
    colData = meta
  )

    # Remove cells that do not belong to any Clusters (if needed)
    # For example: sce <- sce[,sce$LABEL != ""]

logNorm Conversion

  # IMPORTANT!: For SingleR, raw counts MUST be converted to logNorm data
  sce <- logNormCounts(sce)

Save

  saveRDS(sce, "SCE.rds")

SECTION 7:CellChat

CellChat Processing

Load Packages

  suppressPackageStartupMessages(library(CellChat))
  suppressPackageStartupMessages(library(Seurat))

Load Seurat Object

  so <- readRDS("so2.rds")

Define "so2cc" function

  so2cc <- function(
      so, 
      split.by  = "idents",
      assay     = "RNA",
      slot      = "data",
      animal    = "human",   # mouse
      min.cells = 10
  ){

    # This function convert integrated seurat object with multiple conditions 
    # to Cellchat object, folliwing by basic processing.
    
    # convert to cellchat object 
    L <- SplitObject(so, split.by = split.by)

    cc <- list()
    for(i in 1:length(L)){
      data     <- GetAssayData(L[[i]], assay = assay, slot = slot)         
      labels   <- Idents(L[[i]])
      meta     <- data.frame(labels = labels, row.names = names(labels)) 
      cellchat <- createCellChat(data, meta = meta, group.by = "labels")

    # basic processing of cellchat object
      if(animal == "mouse") { 
        cellchat@DB <- CellChatDB.mouse }
      else {
        cellchat@DB <- CellChatDB.human 
      }
      cellchat <- subsetData(cellchat)
      cellchat <- identifyOverExpressedGenes(cellchat)
      cellchat <- identifyOverExpressedInteractions(cellchat)

      cellchat <- computeCommunProb(cellchat)

      cellchat <- filterCommunication(cellchat, min.cells = min.cells)
      cellchat <- computeCommunProbPathway(cellchat)
      cellchat <- aggregateNet(cellchat)

      cellchat <- netAnalysis_computeCentrality(cellchat, slot.name = "netP")
      cc[[i]]  <- cellchat
    }

    # merge
    names(cc) <- names(L)

    # output
    return(cc)
  }

convert Seurat object to CellChat object by so2cc

  cc <- so2cc(so, split.by = "src", animal = "mouse")
  saveRDS(cc, "cc.rds")

Visual1

Load Packages

  suppressPackageStartupMessages(library(CellChat))

Load Data

  cc <- readRDS("cc.rds")

From cc list to cc object

  if(length(cc)!=1){
    cellchat <- mergeCellChat(cc, add.names = names(cc))
  } else {
    cellchat <- cc[[1]]
  }

  # cc is used later
  netVisual_diffInteraction(
    cellchat,
    comparison = c(1 , 2),   # 2nd vs 1st : up(red), down (blue)
    weight.scale = T,
    measure = "weight"
  )
  netVisual_heatmap(
    cellchat,
    comparison = c(1, 2),
    measure = "weight"
  )
  num.link <- sapply(
    cc, 
    function(x) {rowSums(x@net$count) + colSums(x@net$count)-diag(x@net$count)}
  )

  weight.MinMax <- c(min(num.link), max(num.link)) # control the dot size in the different datasets

  gg <- list()
  for (i in 1:length(cc)) {
    gg[[i]] <- netAnalysis_signalingRole_scatter(
      cc[[i]],
      title = names(cc)[i], 
      weight.MinMax = weight.MinMax)
  }

  gg[[2]]
  netAnalysis_signalingChanges_scatter(
    cellchat,
     comparison = c(1, 2), 
    idents.use = "beta"
  )
  netVisual_bubble(
    cellchat, 
    sources.use = "beta",
    #targets.use = "alpha",
    comparison = c(1, 2),
    angle= 45
  )

Visual2

Load Packages

  suppressPackageStartupMessages(library(CellChat))

Load Data

  cc <- readRDS("cc2.rds")

  if(length(cc)!=1){
    cellchat <- mergeCellChat(cc, add.names = names(cc))
  } else {
    cellchat <- cc[[1]]
  }
  Ns = as.numeric(table(cellchat@idents))
  M  =  cellchat@net$weight

  netVisual_circle(
    M,
    vertex.weight = Ns, 
    weight.scale  = T 
  )
  # Each Pairs
  for (i in 1:nrow(M)) {
    M2 <- matrix(0, nrow = nrow(M), ncol = ncol(M), dimnames = dimnames(M))   # size-matched empty matrix
    M2[i, ] <- M[i, ]
  
    netVisual_circle(
      M2, 
      vertex.weight = Ns, 
      weight.scale = T, 
      edge.weight.max = max(M)  # NEED
    )
  }
  # Signaling pathways showing significant communication
  cellchat@netP$pathways
  sigP = c("VEGF")
  LR = extractEnrichedLR(cellchat, sigP)

  head(LR)
  # show all the significant interactions (L-R pairs) 
  # associated with certain signaling pathways
  netVisual_bubble(
    cellchat, 
    signaling = sigP
  )
  netVisual_aggregate(
    cellchat,  
    sigP,
    layout = "circle"    # or layout = "chord"
  )

  # Circle plot (One L-R Pair)
  netVisual_individual(cellchat, sigP, pairLR.use = LR[2,], layout = "circle")
  # Significant interactions (L-R pairs) 
  # from some cell groups ('sources.use') to other cell groups ('targets.use')
  netVisual_bubble(
    cellchat, 
    sources.use = "beta", 
    #targets.use = c("alpha", "delta")
  )
  plotGeneExpression(cellchat, signaling = sigP)  # if you displayy all genes, specify enriched.only = FALSE

Monocle3

  suppressPackageStartupMessages(library(Seurat))
  suppressPackageStartupMessages(library(SeuratWrappers))
  suppressPackageStartupMessages(library(monocle3))
  so  <- readRDS("*.rds")
  cds <- as.cell_data_set(so)
  cds <- estimate_size_factors(cds)
  cds <- cluster_cells(cds, reduction_method = "UMAP") 
  cds <- learn_graph(cds, use_partition = T)
  plot_cells(cds, 
             color_cells_by = "partition", 
             label_principal_point = T,
             graph_label_size = 6, 
             cell_size = .5)
  plot_cells(cds, 
             color_cells_by = "partition", 
             graph_label_size = 6, 
             cell_size = 1)
  cds <- order_cells(cds, root_pr_nodes = c("Y_*"))
  plot_cells(cds,
             color_cells_by = "pseudotime",
             show_trajectory_graph = T,
             graph_label_size  = 6,
             cell_size = 1)
  genes <- c("Col1a1")
  sub_cds <- cds[row.names(cds) %in% genes,]
    plot_genes_in_pseudotime(sub_cds, 
                             label_by_short_name = F,
                             color_cells_by = "seurat_clusters", 
                             min_expr = 0.1)