前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >用V5版本Seurat做单细胞数据文献复现

用V5版本Seurat做单细胞数据文献复现

作者头像
生信菜鸟团
发布2024-01-19 18:23:43
1.3K0
发布2024-01-19 18:23:43
举报
文章被收录于专栏:生信菜鸟团生信菜鸟团

上周群里的小伙伴想让我复现一篇单细胞文献数据,看了一下是比较基础的分析流程。想到自己之前也没完整展示用V5版本的Seurat的分析流程,所以就以此当示例数据来分享以下这篇复现流程。

V5和V4的代码区别主要在前期导入数据和其中的数据有些许改变,曾老师在之前的几篇推文还有直播中都有提到。

例如:

v4: sce.all@assays$RNA@counts;

v5: sce.all@assaysRNAcounts / sce.all@assaysRNA@layerscounts。

当然harmony的整合方式也有改变,如下代码所示,从官网上copy过来的代码。但是本周推文并没有修改此处的代码。

代码语言:javascript
复制
# load in the pbmc systematic comparative analysis dataset
obj <- LoadData("pbmcsca")
obj <- subset(obj, nFeature_RNA > 1000)
obj <- RunAzimuth(obj, reference = "pbmcref")
# currently, the object has two layers in the RNA assay: counts, and data
obj
obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Method)
obj
obj <- NormalizeData(obj)
obj <- FindVariableFeatures(obj)
obj <- ScaleData(obj)
obj <- RunPCA(obj)
obj <- RunUMAP(obj, dims = 1:30, reduction = "pca", reduction.name = "umap.unintegrated")
# visualize by batch and cell type annotation
# cell type annotations were previously added by Azimuth
DimPlot(obj, reduction = "umap.unintegrated", group.by = c("Method", "predic.celltype.l2"))

下面先让我简要介绍一下这篇文献。

紫外线照射皮肤的单细胞 RNA 序列分析揭示了与光照相关的炎症和维生素 D 的保护作用 。

本研究通过单细胞测序对紫外线照射后的小鼠皮肤进行了研究。观察到紫外线照射后的小鼠皮肤主要诱发成纤维细胞炎症,并显示出不同的基因表达。

要复现的图:

image.png

step1~ step4:导入数据 - 降维聚类分群
代码语言:javascript
复制
###如何使用安装好的v5###
#使用的时候加载v5路径
.libPaths(c(
  '/home/data/t140333/seurat_v5/', 
  "/home/data/t140333/R/x86_64-pc-linux-gnu-library/4.3",
  "/usr/local/lib/R/library" 
))
getwd()
#再次检测所用的Seurat版本
packageVersion("Seurat")

#setwd("../")
rm(list=ls())
options(stringsAsFactors = F) 
source('scRNA_scripts/lib.R')

###### step1:导入数据 ######   
# https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE173385
dir='GSE173385_raw/' 
samples=list.files( dir ,pattern = 'gz')
samples 
library(data.table)
ctList = lapply(samples,function(pro){ 
  # pro=samples[1] 
  print(pro)
  ct=fread(file.path( dir ,pro),data.table = F)
  ct[1:4,1:4]
  rownames(ct)=ct[,1]
  colnames(ct) = paste(gsub('_matrix.tsv.gz','',pro),
                       colnames(ct) ,sep = '_')
  ct=ct[,-1] 
  return(ct)
})

#检查行列数目,目的是找交集基因-----
lapply(ctList, dim)
tmp =table(unlist(lapply(ctList, rownames)))
head(tmp)
cg = names(tmp)[tmp==length(samples)]
head(cg)

#合并矩阵-----
bigct = do.call(cbind,
                lapply(ctList,function(ct){ 
                  ct = ct[cg,] 
                  return(ct)
                }))
dim(bigct)

#报错需更新Matrix版本
# Error in validObject(.Object) : 
#   invalid class “LogMap” object: superclass "mMatrix" not defined in the environment of the object's class

#library(Matrix)
sce.all=CreateSeuratObject(counts = bigct, 
                           min.cells = 5,
                           min.features = 300)
sce.all
as.data.frame(sce.all@assays$RNA$counts[1:10, 1:2])
head(sce.all@meta.data, 10)
table(sce.all@meta.data$orig.ident) 
zz<-as.data.frame(sce.all@assays$RNA$counts[1:1000, 1:2])
getwd()
###### step2:QC质控 ######
dir.create("./1-QC")
setwd("./1-QC")
# 如果过滤的太狠,就需要去修改这个过滤代码
source('../scRNA_scripts/qc.R')
sce.all.filt = basic_qc(sce.all)
print(dim(sce.all))
print(dim(sce.all.filt))
setwd('../')

#如果你的数据集是human要修改sp="human"
sp='mouse'
# install.packages("Matrix", type = "source")
# install.packages("irlba", type = "source")
###### step3: harmony整合多个单细胞样品 ######
dir.create("2-harmony")
getwd()
setwd("2-harmony")
source('../scRNA_scripts/harmony.R')
# 默认 ScaleData 没有添加"nCount_RNA", "nFeature_RNA"
# 默认的
sce.all.int = run_harmony(sce.all.filt)
setwd('../')

###### step4: 降维聚类分群和看标记基因库 ######
# 原则上分辨率是需要自己肉眼判断,取决于个人经验
# 为了省力,我们直接看  0.1和0.8即可
table(Idents(sce.all.int))
table(sce.all.int$seurat_clusters)
table(sce.all.int$RNA_snn_res.0.1) 
table(sce.all.int$RNA_snn_res.0.8) 

getwd()
dir.create('check-by-0.1')
setwd('check-by-0.1')
sel.clust = "RNA_snn_res.0.1"
sce.all.int <- SetIdent(sce.all.int, value = sel.clust)
table(sce.all.int@active.ident) 
source('../scRNA_scripts/check-all-markers.R')
setwd('../') 
getwd()

dir.create('check-by-0.8')
setwd('check-by-0.8')
sel.clust = "RNA_snn_res.0.8"
sce.all.int <- SetIdent(sce.all.int, value = sel.clust)
table(sce.all.int@active.ident) 
source('../scRNA_scripts/check-all-markers.R')
setwd('../') 
getwd()

last_markers_to_check
step5: 确定单细胞亚群生物学名字
代码语言:javascript
复制
###### step5: 确定单细胞亚群生物学名字 ######
# 一般来说,为了节省工作量,我们选择0.1的分辨率进行命名
# 因为命名这个步骤是纯人工 操作
# 除非0.1确实分群太粗狂了,我们就选择0.8 
source('scRNA_scripts/lib.R')
#sce.all.int = readRDS('2-harmony/sce.all_int.rds')
colnames(sce.all.int@meta.data) 
table(sce.all.int$RNA_snn_res.0.8)
pdf('orig.ident-vs-RNA_snn_res.0.1.pdf')
gplots::balloonplot(table(sce.all.int$RNA_snn_res.0.1,sce.all.int$orig.ident))
dev.off()
#文章给出的marker gene
# keratinocytes (Krt5, Krt14), 
# hair follicle cells (Krt17, Krt79, Sox9, HFCs), 
# fibroblasts (Col1a1, Dcn, Lum), 
# myeloid (Cd74, Lyz2), 
# sebaceous gland cells (Mgst1, Krt25, Pparg), 
# T cells (Cd3d, Nkg7),
# endothelial cells (Mgp, Fabp4, ECs), 
# melanocytes (Mlana, Pmel) 
#文章中把Keratinocytes细分成多个亚群,我这里就统称为一个了,所以细胞亚群命名会存在与原文有一定的差异~
# 5,6,0,2,3,4,1,8,7,18,12,14,20 Keratinocytes
# 15,13:Fibroblast
# 22:Endo
# 11,17:macrophage
# 16:T
# 23:melanocytes
# 9,10:Hair follicle cell
# 19:sebaceous
pdf('orig.ident-vs-RNA_snn_res.0.1.pdf')
gplots::balloonplot(table(sce.all.int$RNA_snn_res.0.1,sce.all.int$orig.ident))
dev.off()
setwd("../")
getwd()
if(T){
  sce.all.int
  celltype=data.frame(ClusterID=0:23 ,
                      celltype= 0:23 ) 
  #定义细胞亚群        
  celltype[celltype$ClusterID %in% c( 5,6,0,2,3,4,1,8,7,18,12,14,20,21 ),2]='Keratinocytes'
  celltype[celltype$ClusterID %in% c( 15,13 ),2]='Fibroblast'   
  celltype[celltype$ClusterID %in% c( 22 ),2]='Endo'  
  celltype[celltype$ClusterID %in% c( 11,17 ),2]='macrophage'
  celltype[celltype$ClusterID %in% c( 16),2]='T'   
  celltype[celltype$ClusterID %in% c( 23 ),2]='melanocytes'  
  celltype[celltype$ClusterID %in% c( 9,10 ),2]='Hair follicle cell'   
  celltype[celltype$ClusterID %in% c( 19 ),2]='sebaceous'    
  head(celltype)
  celltype
  table(celltype$celltype)
  sce.all.int@meta.data$celltype = "NA"
  
  for(i in 1:nrow(celltype)){
    sce.all.int@meta.data[which(sce.all.int@meta.data$RNA_snn_res.0.8 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
  Idents(sce.all.int)=sce.all.int$celltype
  
  table( Idents(sce.all.int))
  
  
  sel.clust = "celltype"
  sce.all.int <- SetIdent(sce.all.int, value = sel.clust)
  table(sce.all.int@active.ident) 
  
  dir.create('check-by-celltype')
  setwd('check-by-celltype')
  source('../scRNA_scripts/check-all-markers.R')
  setwd('../') 
  getwd()
}

image.png

step6:热图可视化
代码语言:javascript
复制
getwd()
#step6:热图可视化
GSE173385_selected_genes

DoHeatmap(subset(sce.all.int,downsample=100),GSE173385_selected_genes,size=3)+
  scale_fill_gradientn(colors = c("#4DBBD5","white","#E64B35"))
ggsave(filename=paste0(pro,'DoHeatmap_check_GSE173385_markers_by_clusters.pdf') ,
       width=10,height=8)

library(stringr)
#图片转换https://ocr.wdku.net/index_pictranslation
gene<-c("Clca3a2 Ifi202b Krt77 Alox12e Mt4 Krt14 Krt5 Wfdc21 Serpina12 Krt79 Defb6 BC100530 Flg Lcelm Lcelc Cxcll Ccl2 Cd74 H2-Aa Colla2 Collal Atf3 Fos Ctla2a Rgsl Retnla Ccl8 Hist1h1b Krt71 S100a3 Mgstl Scdl Cd207 Mfge8 Mgp Tm4sfl Mlana Dct")
gene2<-as.data.frame(strsplit(gene," ",","))
colnames(gene2)<-"gene"
p1 <- DotPlot(sce.all.int, features = gene2$gene )  + coord_flip()+theme(axis.text.x=element_text(angle=45,hjust = 1)) 
p1
DoHeatmap(subset(sce.all.int,downsample=100),gene2$gene,size=3)+
  scale_fill_gradientn(colors = c("#4DBBD5","white","#E64B35"))
ggsave(filename='gene_DoHeatmap_check_GSE173385_markers_by_clusters.pdf' ,
       width=10,height=8)

因为篇幅有限,在此只展示check-all-markers.R 的部分代码,不过跑出来这篇文献的结果下面这些marker gene差不多够用了,因为还补充了文献给出的marker gene。

代码语言:javascript
复制
GSE173385_selected_genes=  c("Krt5", "Krt14",
                   "Krt17", "Krt79",
                   "Sox9", "HFCs",
                   "Col1a1","Dcn",
                   "Lum","Cd74","Lyz2",
                   "Mgst1","Krt25","Pparg",
                   "Cd3d","Nkg7","Mgp","Fabp4",
                   "ECs","Mlana",
                   "Pmel")

gastric_cancer_markers = c('PTPRC', 
                   'MUC2' , 'ITLN1',
                   'FABP1' , 'APOA1',
                   'CEACAM5' , 'CEACAM6',
                   'EPCAM', 'KRT18', 'MUC1',
                   'MUC6' , 'TFF2',
                   'PGA4' , 'PGA3',
                   'MUC5AC' , 'TFF1','CHGA' , 'CHGB') 
Myo=c("Krt17", "Krt14", "Krt5", "Acta2", "Myl9", "Mylk", "Myh11")
Lum=c("Krt19", "Krt18", "Krt8")
Hs=c("Prlr", "Cited1", "Pgr", "Prom1", "Esr1")  
AV=c("Mfge8", "Trf", "Csn3", "Wfdc18", "Elf5", "Ltf")
Lp=c("Kit", "Aldh1a3", "Cd14")
Fib=c("Col1a1", "Col1a2", "Col3a1", "Fn1")
GSE150580_breast_cancer_markers_list =list(
  Myo=Myo,
  Lum=Lum,
  Hs=Hs, 
  AV=AV,
  Lp=Lp,  
  Fib=Fib 
  
)  

last_markers = c('PTPRC', 'CD3D', 'CD3E', 'CD4','CD8A',
                   'CD19', 'CD79A', 'MS4A1' ,
                   'IGHG1', 'MZB1', 'SDC1',
                   'CD68', 'CD163', 'CD14', 
                   'TPSAB1' , 'TPSB2',  # mast cells,
                   'RCVRN','FPR1' , 'ITGAM' ,
                   'C1QA',  'C1QB',  # mac
                   'S100A9', 'S100A8', 'MMP19',# monocyte
                   'FCGR3A',
                   'LAMP3', 'IDO1','IDO2',## DC3 
                   'CD1E','CD1C', # DC2
                   'KLRB1','NCR1', # NK 
                   'FGF7','MME', 'ACTA2', ## human  fibo 
                 'GJB2', 'RGS5',
                   'DCN', 'LUM',  'GSN' , ## mouse PDAC fibo 
                   'MKI67' , 'TOP2A', 
                   'PECAM1', 'VWF',  ## endo 
                 "PLVAP",'PROX1','ACKR1','CA4','HEY1',
                   'EPCAM' , 'KRT19','KRT7', # epi 
                   'FYXD2', 'TM4SF4', 'ANXA4',# cholangiocytes
                   'APOC3', 'FABP1',  'APOA1',  # hepatocytes
                   'Serpina1c',
                   'PROM1', 'ALDH1A1' )
GSE173385_selected_genes
gastric_cancer_markers 
last_markers 




markers = c("GSE173385_selected_genes",'gastric_cancer_markers'
            'last_markers' )
p_umap=DimPlot(sce.all.int, reduction = "umap",label = T,repel = T) 
p_umap 

if(sp=='human'){
   lapply(markers, function(x){
     #x=markers[1]
     genes_to_check=str_to_upper(get(x)) 
     DotPlot(sce.all.int , features = genes_to_check )  + 
       coord_flip() + 
       theme(axis.text.x=element_text(angle=45,hjust = 1))
     
     h=length( genes_to_check )/6+3;h
     ggsave(paste('check_for_',x,'.pdf'),height = h)
   })
  lapply(markers_list, function(x){
    # x=markers_list[1]
    genes_to_check = lapply(get(x), str_to_upper)
    dup=names(table(unlist(genes_to_check)))[table(unlist(genes_to_check))>1]
    genes_to_check = lapply(genes_to_check, function(x) x[!x %in% dup])
  
    DotPlot(sce.all.int , features = genes_to_check )  + 
     # coord_flip() + 
      theme(axis.text.x=element_text(angle=45,hjust = 1))
    
    w=length( unique(unlist(genes_to_check)) )/5+6;w
    ggsave(paste('check_for_',x,'.pdf'),width  = w)
  })
  
  last_markers_to_check <- str_to_upper(last_markers ) 

 }else if(sp=='mouse'){
   lapply(markers, function(x){
     #x=markers[1]
     genes_to_check=str_to_title(get(x)) 
     DotPlot(sce.all.int , features = genes_to_check )  + 
       coord_flip() + 
       theme(axis.text.x=element_text(angle=45,hjust = 1))
     
     h=length( genes_to_check )/6+3;h
     ggsave(paste('check_for_',x,'.pdf'),height = h)
   })
   lapply(markers_list, function(x){
     # x=markers_list[1]
     genes_to_check = lapply(get(x), str_to_title)
     dup=names(table(unlist(genes_to_check)))[table(unlist(genes_to_check))>1]
     genes_to_check = lapply(genes_to_check, function(x) x[!x %in% dup])
     
     DotPlot(sce.all.int , features = genes_to_check )  + 
       # coord_flip() + 
       theme(axis.text.x=element_text(angle=45,hjust = 1))
     
     w=length( unique(unlist(genes_to_check)) )/5+6;w
     ggsave(paste('check_for_',x,'.pdf'),width  = w)
   })
   
   last_markers_to_check <<- str_to_title(last_markers ) 
}else {
  print('we only accept human or mouse')
} 

p_all_markers = DotPlot(sce.all.int , features = last_markers_to_check )  + 
  coord_flip() + 
  theme(axis.text.x=element_text(angle=45,hjust = 1)) 
p_all_markers+p_umap
h=length( last_markers_to_check )/6+3;h
w=length( unique( Idents(sce.all.int)) )/5+10;w
ggsave(paste('last_markers_and_umap.pdf'),width  = w,height = h)
本文参与?腾讯云自媒体分享计划,分享自微信公众号。
原始发表:2024-01-18,如有侵权请联系?cloudcommunity@tencent.com 删除

本文分享自 生信菜鸟团 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与?腾讯云自媒体分享计划? ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • step1~ step4:导入数据 - 降维聚类分群
  • step5: 确定单细胞亚群生物学名字
  • step6:热图可视化
相关产品与服务
灰盒安全测试
腾讯知识图谱(Tencent Knowledge Graph,TKG)是一个集成图数据库、图计算引擎和图可视化分析的一站式平台。支持抽取和融合异构数据,支持千亿级节点关系的存储和计算,支持规则匹配、机器学习、图嵌入等图数据挖掘算法,拥有丰富的图数据渲染和展现的可视化方案。
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档
http://www.vxiaotou.com