有序分类Logistic回归

发布于 2022-07-15  116 次阅读


Logistic回归的假设

  • 假设1:因变量唯一,且为有序多分类变量
  • 假设2:存在一个或多个自变量,可为连续、有序多分类或无序分类变量。
  • 假设3:自变量之间无多重共线性。
  • 假设4:模型满足“比例优势”假设。

通过conda安装纯净环境的Logistic分析包

  • conda create -n logistic -c conda-forge r-base=4.1.3
  • conda activate logistic
  • conda install -c conda-forge r-tidyverse=1.3.1 -y
  • conda install -c conda-forge r-irkernel -y
  • conda install -c conda-forge r-foreign -y
  • conda install -c conda-forge r-mass -y
  • conda install -c conda-forge r-hmisc -y
  • conda install -c conda-forge r-reshape2 -y
  • conda install -c r r-car -y
  • Rscript -e "IRkernel::installspec(name='logistic', displayname='r-logistic')"

准备数据

group <- t(readRDS('prad_tpm_Cu_death.rds'))
clinical <- read.csv('TCGA-PRAD_clinical.csv', row.names = 'bcr_patient_barcode')
f_TCGA_gleason_grade <- function(primary_gleason_grade, secondary_gleason_grade){
    primary_gleason_grade <- as.numeric(unlist(data.frame(strsplit(primary_gleason_grade, ' '))[2,]))
    secondary_gleason_grade <- as.numeric(unlist(data.frame(strsplit(secondary_gleason_grade, ' '))[2,]))
    primary_gleason_grade + secondary_gleason_grade
}
clinical[['gleason']] <- f_TCGA_gleason_grade(clinical$primary_gleason_grade, clinical$secondary_gleason_grade)
mergeID <- intersect(rownames(clinical), rownames(group))
df <- cbind(group[mergeID,], clinical[mergeID, c('gleason', 'dcf_time', 'dcf_status', 'os_time', 'os_status')])
df[,'dcf_status'] = ifelse(df[,'dcf_status']==1,0,1)
df

多重共线性检测

  • qr(as.matrix(df[1:10]))$rank == 10
  • kappa(cor(as.matrix(df[1:10])), exact= TRUE) < 100
  • library(car)
  • vif(lm.sol)

Logistic回归

require(foreign)
require(ggplot2)
require(MASS)
require(Hmisc)
require(reshape2)
lmf <- paste0("factor(gleason)~", paste(colnames(df)[1:10], collapse = '+'))
lmf
m <- polr(formula(lmf), data = df, Hess = TRUE)
m
drop1(m,test="Chi") 
(ci <- confint(m))
exp(cbind(OR = coef(m), ci))
res <- as.data.frame(exp(cbind(OR = coef(m), ci)))
res[['Pvalue']] <- drop1(m,test="Chi")[rownames(res),'Pr(>Chi)']
res[['VarName']] <- rownames(res)
colnames(res)[1:3] <- c('mean', 'lower', 'upper')
res
saveRDS(res, 'Logistic.rds')

绘制森林图

绘制函数来源

df <- readRDS('Logistic.rds')
options(repr.plot.width=8, repr.plot.height=6)
f_forestplot(df, zero = 1)

医学生