在科研菌公衆号聊天框回複“forest779”即可獲得輸入資料。也可以自己根據表達矩陣與臨床資訊生成,如下:
rm(list = ls())
load("cox_dat.Rdata")
str(dat)
複制
## 'data.frame': 516 obs. of 10 variables:
## $ time : num 47.867 0.533 39.7 24.5 49.767 ...
## $ event : num 0 0 1 1 0 0 0 0 0 0 ...
## $ gender: Factor w/ 2 levels "female","male": 2 1 2 2 2 2 2 1 2 2 ...
## $ stage : Factor w/ 4 levels "i","ii","iii",..: 3 3 1 1 2 2 1 1 1 1 ...
## $ age : num 66 77 57 59 57 67 70 52 51 53 ...
## $ miR21 : num 16.7 18 18 17.8 18.4 ...
## $ miR143: num 17 18 17.1 16.5 16.8 ...
## $ miR10b: num 17.7 17.9 17.5 17.1 17.1 ...
## $ miR192: num 15.3 11.6 14.4 14.2 14.6 ...
## $ miR183: num 8.44 11.24 10.53 10.03 8.43 ...
複制
library(survival)
library(survminer)
library(forestplot)
library(stringr)
複制
1.多因素cox回歸
模組化就一句代碼,出森林圖也是一句代碼。
model <- coxph(Surv(time, event) ~., data = dat )
ggforest(model)
複制
這個雖然是ggplot2對象,但是灰色背景去不掉,也沒辦法用正常的ggplot2文法去修改顔色,隻能是導出去再慢慢調咯。
2.美化版森林圖-forestplot
用到一個新的R包,forestplot。
它就沒有ggforest那麼智能了,森林圖展示的内容是需要自己組織的。
2.1學學幫助文檔
看看函數的幫助文檔,最簡單的用法:
row_names <- list(list("test = 1", expression(test >= 2)))
test_data <- data.frame(
coef = c(1.59, 1.24),
low = c(1.4, 0.78),
high = c(1.8, 1.55)
)
forestplot(row_names,
test_data$coef,
test_data$low,
test_data$high,
zero = 1,
cex = 2,
lineheight = "auto",
xlab = "Lab axis txt"
)
複制
最核心的資訊就是HR值和它的置信區間範圍,我們可以從cox模型中提取到圖上的這些資訊。
2.2組織輸入資料
照葫蘆畫瓢開始,準備添加在圖上的label列。
m = summary(model)
colnames(m$coefficients)
複制
## [1] "coef" "exp(coef)" "se(coef)" "z" "Pr(>|z|)"
複制
colnames(m$conf.int)
複制
## [1] "exp(coef)" "exp(-coef)" "lower .95" "upper .95"
複制
#p值改一下格式,加上顯著性
p = ifelse(
m$coefficients[, 5] < 0.001,
"<0.001 ***",
ifelse(
m$coefficients[, 5] < 0.01,
"<0.01 **",
ifelse(
m$coefficients[, 5] < 0.05,
paste(round(m$coefficients[, 5], 3), " *"),
round(m$coefficients[, 5], 3)
)
)
)
#HR和它的置信區間
dat2 = as.data.frame(round(m$conf.int[, c(1, 3, 4)], 2))
dat2 = tibble::rownames_to_column(dat2, var = "Trait")
colnames(dat2)[2:4] = c("HR", "lower", "upper")
#需要在圖上顯示的HR文字和p值
dat2$HR2 = paste0(dat2[, 2], "(", dat2[, 3], "-", dat2[, 4], ")")
dat2$p = p
str(dat2)
複制
## 'data.frame': 10 obs. of 6 variables:
## $ Trait: chr "gendermale" "stageii" "stageiii" "stageiv" ...
## $ HR : num 1.08 1.22 2.5 6.39 1.03 1.41 0.86 0.89 0.82 0.95
## $ lower: num 0.76 0.61 1.6 4.12 1.01 1.14 0.74 0.75 0.76 0.85
## $ upper: num 1.53 2.46 3.89 9.9 1.05 1.75 0.99 1.05 0.9 1.06
## $ HR2 : chr "1.08(0.76-1.53)" "1.22(0.61-2.46)" "2.5(1.6-3.89)" "6.39(4.12-9.9)" ...
## $ p : chr "0.668" "0.574" "<0.001 ***" "<0.001 ***" ...
複制
2.3基礎畫圖
forestplot(
dat2[, c(1, 4, 6)],
mean = dat2[, 2],
lower = dat2[, 3],
upper = dat2[, 4],
zero = 1,
boxsize = 0.4,
col = fpColors(box = '#1075BB', lines = 'black', zero = 'grey'),
lty.ci = "solid",
graph.pos = 2
)
複制
2.4加點細節
主要是分類向量,如這裡的性别和分期,需要畫出reference,那就需要在輸入資料dat2裡添加幾行。
dat2$Trait = str_remove(dat2$Trait, "gender|stage")
ins = function(x) {
c(x, rep(NA, ncol(dat2) - 1))
}
dat2 = rbind(
c("Trait", NA, NA, NA, "HR", "p"),
ins("gender"),
ins("female"),
dat2[1, ],
ins("stage"),
ins("i"),
dat2[2:nrow(dat2), ]
)
for(i in 2:4) {
dat2[, i] = as.numeric(dat2[, i])
}
str(dat2)
複制
## 'data.frame': 15 obs. of 6 variables:
## $ Trait: chr "Trait" "gender" "female" "male" ...
## $ HR : num NA NA NA 1.08 NA NA 1.22 2.5 6.39 1.03 ...
## $ lower: num NA NA NA 0.76 NA NA 0.61 1.6 4.12 1.01 ...
## $ upper: num NA NA NA 1.53 NA NA 2.46 3.89 9.9 1.05 ...
## $ HR2 : chr "HR" NA NA "1.08(0.76-1.53)" ...
## $ p : chr "p" NA NA "0.668" ...
複制
重新畫圖
forestplot(
dat2[, c(1, 5, 6)],
mean = dat2[, 2],
lower = dat2[, 3],
upper = dat2[, 4],
zero = 1,
boxsize = 0.4,
col = fpColors(box = '#1075BB', lines = 'black', zero = 'grey'),
lty.ci = "solid",
graph.pos = 2,
#xticks = F,
is.summary = c(T, T, F, F, T, rep(F, 10)),
align = "l",
hrzl_lines = list(
"1" = gpar(lty=1),
"2" = gpar(lty=1),
"16"= gpar(lty=1)),
colgap = unit(5, 'mm')
)
複制
差不多啦!收工。