Seurat (九) hclust聚类分析 (二) by 细胞比例

发布于 2021-12-17  543 次阅读


第一步 读入数据

Seurat (十) 同群细胞在不同脑区的DEGs (二)第二步

第二步 构造相关计算函数

f_ggplot2_ti <- function(p, title, subtitle=''){
    if (subtitle == ''){
        (p + ggtitle(title) + theme(plot.title = element_text(hjust = 0.5)))
    }else{
        (p + ggtitle(title, subtitle=subtitle) + 
         theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5)))
    }
}
 
library(ggdendro)
f_ggdendrogram <- function(hc){
    ggdendrogram(hc$h, rotate = T, size = 3)+theme(axis.text = element_text(size=14,face = "bold"))
}
 
f_hc <- function(lc_counts){
    d <- dist(t(lc_counts))
    h <- hclust(d)
    list(d=d, h=h)
}
 
f_hc_cop <- function(hc){
    cop <- cophenetic(hc$h)
    cor(cop, hc$d)
}
 
 
f_cluster_averages <- function(lc_scRNA, lc_metaN='ident'){
    # 切分出Clusters
    lc_clusters <- SplitObject(lc_scRNA, split.by = lc_metaN)
    for (lc_i in 1:length(lc_clusters)){
        lc_clusters[[lc_i]]  <- lc_clusters[[lc_i]][[lc_clusters[[lc_i]]@active.assay]]@scale.data
    }
    for (lc_i in 1:length(lc_clusters)){
        lc_clusters[[lc_i]]  <- apply(lc_clusters[[lc_i]],1,mean)
    }
    lc_clusters <- data.frame(lc_clusters)
    scale(lc_clusters)
}

f_br_cluster_f <- function(sObject, lc_groupN){
    lc_filter <- unlist(unique(sObject[[lc_groupN]]))
    lc_filter <- lc_filter[!is.na(lc_filter)]
    lc_filter
}

f_br_cluster <- function(sObject, lc_groupN, lc_labelN, lc_prop = F){
    lc_all <- unique(sObject[[lc_labelN]])
    rownames(lc_all) <- lc_all[[1]]
    colnames(lc_all) <- "CB"
#     lc_tp  <- SplitObject(subset(x = sObject, !!sym(lc_groupN)%in%f_br_cluster_f(sObject, lc_groupN)), split.by = lc_groupN)
    lc_tp  <- SplitObject(sObject, split.by = lc_groupN)
    for(lc_i in 1:length(lc_tp)){
        if(lc_prop){
            lc_tp[[lc_i]] <- prop.table(table(lc_tp[[lc_i]][[lc_labelN]]))
        }else{
            lc_tp[[lc_i]] <- table(lc_tp[[lc_i]][[lc_labelN]])
        }
    }
    for(lc_name in names(lc_tp)){
        lc_all[[lc_name]] = 0
        lc_all[names(lc_tp[[lc_name]]), lc_name] = lc_tp[[lc_name]]
    }
    lc_all[,-1]
}

 

  • 相关性验证的代码
library(Hmisc)
library(ggcorrplot)
f_corrplot <- function(lc_scdata){
    lc_cor <- rcorr(lc_scdata)
    lc_cor$P[is.na(lc_cor$P)]=0
    ggcorrplot(lc_cor$r, type="full",hc.order = T, lab = T, p.mat = lc_cor$P)
}

 

第三步 (可选)筛选感兴趣的细胞群

n_ExN <- c('L4 IT','L5 IT','L5 ET','IT','L6b','L5/6 IT Car3','L6 IT','L2/3 IT','L5/6 NP','L6 IT Car3','L6 CT')
 
n_InN <- c('Lamp5','Pvalb','Sst','Vip','Sncg')
 
n_NoN <- c('Astro','PAX6','Endo','Micro-PVM','OPC','Oligo','Pericyte','VLMC')
 
n_groups <- list(NoN=n_NoN, ExN=n_ExN, InN=n_InN)
 
f_listUpdateRe <- function(lc_obj, lc_bool, lc_item){
    lc_obj[lc_bool] <- rep(lc_item,times=sum(lc_bool))
    lc_obj
}
 
f_grouplabel <- function(lc_meta.data, lc_groups){
    res <- lc_meta.data[[1]]
    for(lc_g in names(lc_groups)){
        lc_bool = (res %in% lc_groups[[lc_g]])
        for(c_n in colnames(lc_meta.data)){
            lc_bool = lc_bool | (lc_meta.data[[c_n]] %in% lc_groups[[lc_g]])
        }
        res <- f_listUpdateRe(res, lc_bool, lc_g)
    }
    names(res) <- rownames(lc_meta.data)
    res
}

scRNA_split[['n_groups']] <- f_grouplabel(scRNA_split[[c("hM1_hmca_class")]], n_groups)

sc_Neuron  <- subset(x = scRNA_split, n_groups %in% c("InN", "ExN"))

 

第四步 进行计算

  • by gene
test_N <- SplitObject(sc_Neuron, split.by = 'orig.ident')
test_N[['all']] <- sc_Neuron

listPN <- list()
for (n in names(test_N)){
    test = f_cluster_averages(test_N[[n]], "Region")
    hc <- f_hc(test)
    cop <- paste(n, hc %>% f_hc_cop())
    print(cop)
    listPN[[n]] <- (f_hc(test) %>% f_ggdendrogram() %>% f_ggplot2_ti(paste0('hclust by gene in ', n), subtitle = cop)) +
    (f_corrplot(test) %>% f_ggplot2_ti(paste0('cor by gene in ', n)))
}

 

  • by proportion of cells
test_s <- SplitObject(scRNA_split, split.by = 'orig.ident')
test_s[['all']] <- scRNA_split

listP <- list()
for (n in names(test_s)){
    test = as.matrix(f_br_cluster(test_s[[n]], 'Region', 'hM1_hmca_class', lc_prop = T))
    hc <- f_hc(test)
    cop <- paste(n, hc %>% f_hc_cop())
    print(cop)
    listP[[n]] <- (f_hc(test) %>% f_ggdendrogram() %>% f_ggplot2_ti(paste0('hclust by proportion of cells in ', n), subtitle = cop)) +
    (f_corrplot(test) %>% f_ggplot2_ti(paste0('cor by proportion of cells of ', n)))
}

 

第五步 输出计算结果

# 配置数据和mark基因表的路径
root_path = "~/zlliu/R_data/hBLA"
 
# 配置结果保存路径
output_path = "~/zlliu/R_output/21.12.17.pcell_hclust"
if (!file.exists(output_path)){dir.create(output_path)}
 
# 设置工作目录,输出文件将保存在此目录下
setwd(output_path)
getwd()

library(export)
f_ppt_output <- function(fileName, image, lc_aspectr=1.5){
    for (p in image){
        graph2ppt(x=p, file=fileName, width=12, aspectr=lc_aspectr, append = TRUE, vector.graphic=F)
    }
}

f_ppt_output('split.pptx', listP, lc_aspectr = 2)
f_ppt_output('sc_Neuron.pptx', listPN, lc_aspectr = 2)

 


医学生