SingleR (一) 参考数据集的构建方法

发布于 2021-09-20  987 次阅读


第一步 下载所需的参考数据集

mkdir Human_Multiple_Cortical_Areas_SMART-seq && cd Human_Multiple_Cortical_Areas_SMART-seq
wget https://idk-etl-prod-download-bucket.s3.amazonaws.com/aibs_human_ctx_smart-seq/matrix.csv
wget https://idk-etl-prod-download-bucket.s3.amazonaws.com/aibs_human_ctx_smart-seq/metadata.csv

第二步 读入下载的参考数据集与注释

ref_meta <- read.csv("/home/rqzhang/zlliu/R_data/human_M1_10x/metadata.csv")
ref_counts <- read.csv("/home/rqzhang/zlliu/R_data/human_M1_10x/matrix.csv")
ref2_meta <- read.csv("/home/rqzhang/zlliu/R_data/Human_Multiple_Cortical_Areas_SMART-seq/metadata.csv")
ref2_counts <- read.csv("/home/rqzhang/zlliu/R_data/Human_Multiple_Cortical_Areas_SMART-seq/matrix.csv")

第三步 转置矩阵

now_t <- Sys.time ()
ref_t_counts <- t(ref_counts) #超级慢,15分钟,似乎没有其他解决方案
Sys.time () - now_t

now_t <- Sys.time ()
ref2_t_counts <- t(ref2_counts) #超级慢,10分钟,似乎没有其他解决方案
Sys.time () - now_t

ref_d_counts <- data.frame(ref_t_counts[-1,], stringsAsFactors = F)
ref2_d_counts <- data.frame(ref2_t_counts[-1,], stringsAsFactors = F)

colnames(ref_d_counts) <- ref_t_counts[1,]
colnames(ref2_d_counts) <- ref2_t_counts[1,]

ref_d_counts <- as.data.frame(lapply(ref_d_counts, as.integer))
ref2_d_counts <- as.data.frame(lapply(ref2_d_counts, as.integer))

rownames(ref_d_counts) <- rownames(ref_t_counts)[-1]
rownames(ref2_d_counts) <- rownames(ref2_t_counts)[-1]

第四步 加载程辑包并计算SummarizedExperiment

library(SummarizedExperiment)
library(scater)

ref_d_counts <- SummarizedExperiment(assays=list(counts=ref_d_counts))
ref2_d_counts <- SummarizedExperiment(assays=list(counts=ref2_d_counts))

ref_d_counts <- logNormCounts(ref_d_counts)
ref2_d_counts <- logNormCounts(ref2_d_counts)

gc()

第五步 与metadata一起保存

ref_d_counts$meta <- ref_meta
ref2_d_counts$meta <- ref2_meta

saveRDS(ref_d_counts, "/home/rqzhang/zlliu/R_data/human_M1_10x/hM1.se.rds")
saveRDS(ref2_d_counts, "/home/rqzhang/zlliu/R_data/Human_Multiple_Cortical_Areas_SMART-seq/hmca.rds")

医学生