“#表1 基线运算 #加载R包 install.packages("tableone") # 正...

Erstellt am: 4. November 2025

Beantwortet mit GPT-5 Pro von Chat01

Frage

“#表1 基线运算
#加载R包
install.packages("tableone")

正确读取 Excel 文件

library(readxl)
df <- read_excel("~/毕业论文/中华高血压杂志V1/基线运算V1.xlsx")

因为"夜间高血压"是分组变量

df夜间高血压<factor(df夜间高血压 <- factor(df夜间高血压,
levels = c(1, 2, 3, 4),
labels = c("1",
"2",
"3",
"4"))

指定分类变量

cat_vars <- c("性别","吸烟","饮酒","抗脂药","抗糖药",
"血脂异常","糖尿病","冠心病","脑血管疾病")

指定连续变量

cont_vars <- c("年龄","身高","体重","BMI",
"全天收缩压","全天舒张压",
"白天收缩压","白天舒张压",
"夜间收缩压","夜间舒张压",
"DDD","空腹血糖","甘油三脂","TyG指数",
"总胆固醇","高密度","低密度",
"肌酐","尿酸")

生成描述性统计表

library(tableone)
table1 <- CreateTableOne(vars = c(cat_vars, cont_vars),
strata = "夜间高血压",
data = df,
factorVars = cat_vars)

输出表格(基线表)

print(table1, showAllLevels = TRUE, quote = FALSE, noSpaces = TRUE)

#转化
print(table1,
showAllLevels = TRUE,
test = TRUE, # 组间检验的 P 值
smd = TRUE, # 标准化差异
contDigits = 2,
pDigits = 3,
quote = FALSE,
noSpaces = TRUE)
tab_mat <- print(table1, showAllLevels=TRUE, test=TRUE, smd=TRUE, printToggle=FALSE)
#word版”将夜间高血压中(1、2、3)三组相加,组成一个总的分组,并做这个总分组的统计描述,在上面分组中进行修改

Gedanke

Adjusting analysis strategy

The user seeks to merge groups 1–3 into a single group and analyze it separately, while maintaining the four-level grouping. They aim for descriptive statistics for the combined group and include an unadjusted hazard ratio.

Adjusting my approach

To generate an aggregated table for groups 1–3, I create a subset table without stratification using the "tableone" package. This yields the overall statistics.

Crafting a Word document

I’m compiling detailed reports for combined groups 1–3 and 4. Ensuring correct data presentation and summarizing findings effectively.

Ensuring comprehensive analysis

I’m crafting a detailed R script to manage data, merge groups, and export to Word with flextable. This includes options to retain both original and combined group tables for thorough comparison.

Reasoned for 3m 51s

Antwort

你要的是:把“夜间高血压”的 1、2、3 三组合并成一个“总组(1–3合并)”,对这个总组做描述统计;并在原有分组结果里“修改/补充”展示。下面给你一套直接可跑的 R 代码,一次性生成三张表,并输出到 Word(原始4组;1–3合并 vs 4 的二组对比;以及“4组表+合并组描述列”)。

说明

  • A:保持你原来的4组表(含P和SMD)。
  • B:新建二分类分组(“1–3合并” vs “4”)做比较(含P和SMD)。
  • C:在4组表里额外插入一列“1–3合并(描述)”(只做描述,不做检验),便于论文展示。
  • Word 导出:用 officer + flextable 输出到你论文目录。

R 代码

r
# 如未安装请先安装: # install.packages(c("readxl","tableone","officer","flextable")) library(readxl) library(tableone) library(officer) library(flextable) ## 1) 读入数据(与你原代码一致) df <- read_excel("~/毕业论文/中华高血压杂志V1/基线运算V1.xlsx") ## 2) 设置原始分组(1,2,3,4) df$夜间高血压 <- factor(df$夜间高血压, levels = c(1, 2, 3, 4), labels = c("1","2","3","4")) ## 2.1) 构造新的“合并分组”:把 1/2/3 合并为一个层级 df$夜间高血压_合并 <- factor( ifelse(df$夜间高血压 %in% c("1","2","3"), "1-3合并", "4"), levels = c("1-3合并","4") ) ## 3) 变量清单(与你一致) cat_vars <- c("性别","吸烟","饮酒","抗脂药","抗糖药", "血脂异常","糖尿病","冠心病","脑血管疾病") cont_vars <- c("年龄","身高","体重","BMI", "全天收缩压","全天舒张压", "白天收缩压","白天舒张压", "夜间收缩压","夜间舒张压", "DDD","空腹血糖","甘油三脂","TyG指数", "总胆固醇","高密度","低密度", "肌酐","尿酸") vars_all <- c(cat_vars, cont_vars) ## ---- A. 原始4组基线表(含 P 与 SMD) ---- tab_4 <- CreateTableOne(vars = vars_all, strata = "夜间高血压", data = df, factorVars = cat_vars) mat_4 <- print(tab_4, showAllLevels = TRUE, test = TRUE, # 组间检验 P 值 smd = TRUE, # 标准化差异 contDigits = 2, pDigits = 3, quote = FALSE, noSpaces = TRUE, printToggle = FALSE) # 返回矩阵,便于后续导出/拼接 ## ---- B. 二组表:“1-3合并” vs “4”(含 P 与 SMD) ---- tab_2 <- CreateTableOne(vars = vars_all, strata = "夜间高血压_合并", data = df, factorVars = cat_vars) mat_2 <- print(tab_2, showAllLevels = TRUE, test = TRUE, smd = TRUE, contDigits = 2, pDigits = 3, quote = FALSE, noSpaces = TRUE, printToggle = FALSE) ## ---- C. 在4组表中插入“1-3合并(描述)”这一列 ---- # 先对 1/2/3 这三组整体做一次“总体描述”(不分层、无P/SMD) tab_123_overall <- CreateTableOne(vars = vars_all, data = subset(df, 夜间高血压 %in% c("1","2","3")), factorVars = cat_vars) mat_123_overall <- print(tab_123_overall, showAllLevels = TRUE, test = FALSE, smd = FALSE, contDigits = 2, quote = FALSE, noSpaces = TRUE, printToggle = FALSE) # 行对齐并改列名 mat_123_overall <- mat_123_overall[rownames(mat_4), , drop = FALSE] colnames(mat_123_overall) <- "1-3合并(描述)" # 把“1-3合并(描述)”插到 4 组列 与 P/SMD 列 之间 idx_right <- which(colnames(mat_4) %in% c("p","test","SMD")) left_4 <- if (length(idx_right)) mat_4[, -idx_right, drop=FALSE] else mat_4 right_4 <- if (length(idx_right)) mat_4[, idx_right, drop=FALSE] else NULL mat_4_plus <- if (is.null(right_4)) { cbind(left_4, mat_123_overall) } else { cbind(left_4, mat_123_overall, right_4) } ## ---- D. 导出到 Word(3张表各一页/一段) ---- out_dir <- "~/毕业论文/中华高血压杂志V1" out_file <- file.path(out_dir, "基线表_含合并组.docx") doc <- read_docx() # 表A:原始4组 doc <- body_add_par(doc, "原始4组基线表", style = "heading 1") ftA <- flextable(data.frame(变量 = rownames(mat_4), mat_4, check.names = FALSE)) ftA <- autofit(ftA) doc <- body_add_flextable(doc, ftA) doc <- body_add_par(doc, "") # 表B:1–3合并 vs 4(二组比较) doc <- body_add_par(doc, "1–3合并 vs 4(二组比较)", style = "heading 1") ftB <- flextable(data.frame(变量 = rownames(mat_2), mat_2, check.names = FALSE)) ftB <- autofit(ftB) doc <- body_add_flextable(doc, ftB) doc <- body_add_par(doc, "") # 表C:四组表 + “1–3合并(描述)”列 doc <- body_add_par(doc, "四组表 + “1–3合并(描述)”列", style = "heading 1") ftC <- flextable(data.frame(变量 = rownames(mat_4_plus), mat_4_plus, check.names = FALSE)) ftC <- autofit(ftC) doc <- body_add_flextable(doc, ftC) print(doc, target = out_file) message("已导出:", out_file)

小贴士

  • 如果你只想用合并后的分组来替换原分组,那么后续分析里把 strata = "夜间高血压" 改成 strata = "夜间高血压_合并" 即可。
  • 若需要**中位数[IQR]**等非正态统计,可在 print() 里设置 nonnormal = c("变量A","变量B", ...)testNonNormal = TRUE
  • flextable 输出后可以在 Word 中调整边框、字体与列宽,以符合期刊版式。

把这段代码直接放到你现有脚本的读取数据之后即可运行;生成的 Word 文件会保存在 ~/毕业论文/中华高血压杂志V1/基线表_含合并组.docx。如果还想把这三张表拆分为三个不同的 Word 文件,也可以帮你改成分别导出。

Frage

“# 如未安装请先安装:

install.packages(c("readxl","tableone","officer","flextable"))

library(readxl)
library(tableone)
library(officer)
library(flextable)

1) 读入数据(与你原代码一致)

df <- read_excel("~/毕业论文/中华高血压杂志V1/基线运算V1.xlsx")

2) 设置原始分组(1,2,3,4)

df夜间高血压<factor(df夜间高血压 <- factor(df夜间高血压,
levels = c(1, 2, 3, 4),
labels = c("1","2","3","4"))

2.1) 构造新的“合并分组”:把 1/2/3 合并为一个层级

df夜间高血<factor(ifelse(df夜间高血压_合并 <- factor( ifelse(df夜间高血压 %in% c("1","2","3"), "1-3合并", "4"),
levels = c("1-3合并","4")
)

3) 变量清单(与你一致)

cat_vars <- c("性别","吸烟","饮酒","抗脂药","抗糖药",
"血脂异常","糖尿病","冠心病","脑血管疾病")
cont_vars <- c("年龄","身高","体重","BMI",
"全天收缩压","全天舒张压",
"白天收缩压","白天舒张压",
"夜间收缩压","夜间舒张压",
"DDD","空腹血糖","甘油三脂","TyG指数",
"总胆固醇","高密度","低密度",
"肌酐","尿酸")
vars_all <- c(cat_vars, cont_vars)

---- A. 原始4组基线表(含 P 与 SMD) ----

tab_4 <- CreateTableOne(vars = vars_all,
strata = "夜间高血压",
data = df,
factorVars = cat_vars)

mat_4 <- print(tab_4,
showAllLevels = TRUE,
test = TRUE, # 组间检验 P 值
smd = TRUE, # 标准化差异
contDigits = 2,
pDigits = 3,
quote = FALSE,
noSpaces = TRUE,
printToggle = FALSE) # 返回矩阵,便于后续导出/拼接

---- B. 二组表:“1-3合并” vs “4”(含 P 与 SMD) ----

tab_2 <- CreateTableOne(vars = vars_all,
strata = "夜间高血压_合并",
data = df,
factorVars = cat_vars)

mat_2 <- print(tab_2,
showAllLevels = TRUE,
test = TRUE,
smd = TRUE,
contDigits = 2,
pDigits = 3,
quote = FALSE,
noSpaces = TRUE,
printToggle = FALSE)

---- C. 在4组表中插入“1-3合并(描述)”这一列 ----

先对 1/2/3 这三组整体做一次“总体描述”(不分层、无P/SMD)

tab_123_overall <- CreateTableOne(vars = vars_all,
data = subset(df, 夜间高血压 %in% c("1","2","3")),
factorVars = cat_vars)

mat_123_overall <- print(tab_123_overall,
showAllLevels = TRUE,
test = FALSE,
smd = FALSE,
contDigits = 2,
quote = FALSE,
noSpaces = TRUE,
printToggle = FALSE)

行对齐并改列名

mat_123_overall <- mat_123_overall[rownames(mat_4), , drop = FALSE]
colnames(mat_123_overall) <- "1-3合并(描述)"

把“1-3合并(描述)”插到 4 组列 与 P/SMD 列 之间

idx_right <- which(colnames(mat_4) %in% c("p","test","SMD"))
left_4 <- if (length(idx_right)) mat_4[, -idx_right, drop=FALSE] else mat_4
right_4 <- if (length(idx_right)) mat_4[, idx_right, drop=FALSE] else NULL

mat_4_plus <- if (is.null(right_4)) {
cbind(left_4, mat_123_overall)
} else {
cbind(left_4, mat_123_overall, right_4)
}

---- D. 导出到 Word(3张表各一页/一段) ----

out_dir <- "~/毕业论文/中华高血压杂志V1"
out_file <- file.path(out_dir, "基线表_含合并组.docx")

doc <- read_docx()

表A:原始4组

doc <- body_add_par(doc, "原始4组基线表", style = "heading 1")
ftA <- flextable(data.frame(变量 = rownames(mat_4), mat_4, check.names = FALSE))
ftA <- autofit(ftA)
doc <- body_add_flextable(doc, ftA)
doc <- body_add_par(doc, "")

表B:1–3合并 vs 4(二组比较)

doc <- body_add_par(doc, "1–3合并 vs 4(二组比较)", style = "heading 1")
ftB <- flextable(data.frame(变量 = rownames(mat_2), mat_2, check.names = FALSE))
ftB <- autofit(ftB)
doc <- body_add_flextable(doc, ftB)
doc <- body_add_par(doc, "")

表C:四组表 + “1–3合并(描述)”列

doc <- body_add_par(doc, "四组表 + “1–3合并(描述)”列", style = "heading 1")
ftC <- flextable(data.frame(变量 = rownames(mat_4_plus), mat_4_plus, check.names = FALSE))
ftC <- autofit(ftC)
doc <- body_add_flextable(doc, ftC)

print(doc, target = out_file)
message("已导出:", out_file)”错误于mat_123_overall[rownames(mat_4), , drop = FALSE]: 下标出界

Gedanke

Identifying row mismatches

I’m examining row name inconsistencies in matrices by analyzing rows with mismatched names and assessing OpenAI's policies. Hm, I wonder if missing names in matrices trigger subscript out of bounds errors.

Ensuring alignment

I’m considering approaches to align matrices by intersecting row names and adding missing ones, avoiding subscript out of bounds errors and maintaining consistency in printed outputs.

Ensuring matrix alignment

I’m detailing a method to align matrices by intersecting row names, padding with blanks for missing ones, and adjusting categorical variables to maintain consistency and prevent errors.

Enhancing matrix alignment

I’m proposing a robust matrix alignment method using CreateTableOne with AddOverall to ensure consistent row names across subsets, likely resolving the subscript out of bounds error.

Fixing alignment

I’m mapping out solutions to address matrix alignment issues. This involves aligning matrices, ensuring consistent categorical variable handling, revealing missing rows, and providing amendments for the erroneous code segment.

Working through the issue

I’m detailing the recommended fix by converting categorical variables to factors with consistent levels, suggesting safe alignment for matrix rows, and ensuring the updated code aligns with the user's requirements.

Reasoned for 4m 1s

Antwort

你这个错误来自这一句:

r
mat_123_overall <- mat_123_overall[rownames(mat_4), , drop = FALSE]

原因mat_123_overall 里缺了 mat_4 中的一些行名(通常是某些分类变量的某个水平在 1–3 组中完全没有出现,CreateTableOne 在“overall(不分层)”时就不生成该行),所以按 rownames(mat_4) 去取行会“下标出界”。


最稳妥的修正(推荐)

不要用不分层的 overall 去拼列,直接从你已经算好的 二组表 mat_2 里抽取“1–3合并”那一列作为描述列。这样所有分类水平都会出现,行名与 mat_4 能对齐,不会越界。

把你代码中 C 段替换为下面这段即可:

r
## ---- C. 在4组表中插入“1-3合并(描述)”这一列(从二组表提取) ---- # 从二组矩阵中找到“1-3合并”那一列(避免与 p/test/SMD 混淆) grp_cols_2 <- setdiff(colnames(mat_2), c("4", "p", "test", "SMD")) # 正常情况下 grp_cols_2 就只有 "1-3合并" col_merged <- mat_2[, grp_cols_2[1], drop = FALSE] colnames(col_merged) <- "1-3合并(描述)" # 保险:按四组表的行顺序重排(若有缺失行,用空字符补齐) miss_rows <- setdiff(rownames(mat_4), rownames(col_merged)) if (length(miss_rows)) { fill <- matrix("", nrow = length(miss_rows), ncol = ncol(col_merged), dimnames = list(miss_rows, colnames(col_merged))) col_merged <- rbind(col_merged, fill) } col_merged <- col_merged[rownames(mat_4), , drop = FALSE] # 把“1-3合并(描述)”插到 4 组列 与 P/SMD 列 之间 idx_right <- which(colnames(mat_4) %in% c("p","test","SMD")) left_4 <- if (length(idx_right)) mat_4[, -idx_right, drop=FALSE] else mat_4 right_4 <- if (length(idx_right)) mat_4[, idx_right, drop=FALSE] else NULL mat_4_plus <- if (is.null(right_4)) { cbind(left_4, col_merged) } else { cbind(left_4, col_merged, right_4) }

这样做的好处:二组表 mat_2 与四组表 mat_4 的行名体系一致(同一套变量与水平),因此不会再出现“下标出界”。


如果你更想保留“overall(1–3)”的做法(备选)

那就需要保证 1–3 子集的所有分类变量水平与全样本完全一致,并在拼接前补齐缺失行

r
# 1) 固定 1–3 子集的分类变量水平,与全样本一致 df123 <- subset(df, 夜间高血压 %in% c("1","2","3")) for (v in cat_vars) { df123[[v]] <- factor(df123[[v]], levels = levels(df[[v]])) } tab_123_overall <- CreateTableOne(vars = vars_all, data = df123, factorVars = cat_vars) mat_123_overall <- print(tab_123_overall, showAllLevels = TRUE, test = FALSE, smd = FALSE, contDigits = 2, quote = FALSE, noSpaces = TRUE, printToggle = FALSE) # 2) 在与 mat_4 对齐前,先把缺的行补上(用空字符占位) miss_rows <- setdiff(rownames(mat_4), rownames(mat_123_overall)) if (length(miss_rows)) { fill <- matrix("", nrow = length(miss_rows), ncol = ncol(mat_123_overall), dimnames = list(miss_rows, colnames(mat_123_overall))) mat_123_overall <- rbind(mat_123_overall, fill) } # 3) 对齐顺序并改列名 mat_123_overall <- mat_123_overall[rownames(mat_4), , drop = FALSE] colnames(mat_123_overall) <- "1-3合并(描述)" # 4) 与四组表拼接(同你原逻辑) idx_right <- which(colnames(mat_4) %in% c("p","test","SMD")) left_4 <- if (length(idx_right)) mat_4[, -idx_right, drop=FALSE] else mat_4 right_4 <- if (length(idx_right)) mat_4[, idx_right, drop=FALSE] else NULL mat_4_plus <- if (is.null(right_4)) { cbind(left_4, mat_123_overall) } else { cbind(left_4, mat_123_overall, right_4) }

快速自检(可选)

若想确认行名不一致来自哪里,跑这两行就能定位缺的项目:

r
setdiff(rownames(mat_4), rownames(mat_2)) # 二组表缺哪些行 # 或 setdiff(rownames(mat_4), rownames(mat_123_overall)) # overall(1-3)缺哪些行

按以上任一方案修改后,后面的 Word 导出 部分无需改动,可以继续使用你现有的 D 段代码生成文档。

Frage

“#表1 基线运算
#加载R包
install.packages("tableone")

正确读取 Excel 文件

library(readxl)
df <- read_excel("~/毕业论文/中华高血压杂志V1/基线运算V1.xlsx")

因为"夜间高血压"是分组变量

df夜间高血压<factor(df夜间高血压 <- factor(df夜间高血压,
levels = c(1, 2, 3, 4),
labels = c("1",
"2",
"3",
"4"))

指定分类变量

cat_vars <- c("性别","吸烟","饮酒","抗脂药","抗糖药",
"血脂异常","糖尿病","冠心病","脑血管疾病")

指定连续变量

cont_vars <- c("年龄","身高","体重","BMI",
"全天收缩压","全天舒张压",
"白天收缩压","白天舒张压",
"夜间收缩压","夜间舒张压",
"DDD","空腹血糖","甘油三脂","TyG指数",
"总胆固醇","高密度","低密度",
"肌酐","尿酸")

生成描述性统计表

library(tableone)
table1 <- CreateTableOne(vars = c(cat_vars, cont_vars),
strata = "夜间高血压",
data = df,
factorVars = cat_vars)

输出表格(基线表)

print(table1, showAllLevels = TRUE, quote = FALSE, noSpaces = TRUE)

#转化
print(table1,
showAllLevels = TRUE,
test = TRUE, # 组间检验的 P 值
smd = TRUE, # 标准化差异
contDigits = 2,
pDigits = 3,
quote = FALSE,
noSpaces = TRUE)
tab_mat <- print(table1, showAllLevels=TRUE, test=TRUE, smd=TRUE, printToggle=FALSE)”计算统计量,在上述代码中进行修改

Gedanke

Mapping adjustments

I’m thinking through the modifications needed: computing means ± sd for continuous, median [IQR] for non-normal, adding geometric means, and combining groups. Hm, let me see.

Aligning and enhancing

I’m noting that the code should include converting categorical variables to factors, adding automatic non-normality detection, specifying p-values, and ensuring columns show overall statistics if needed.

Utilizing tableone features

I'm pulling together insights on using addOverall for overall column inclusion, ensuring Fisher's exact test for small counts, and maintaining minMax display for continuous variable ranges.

Integrating statistical features

Detaching the influence of external factors, I’m adding normalization checks, Fisher exact tests, and customizing display details. Progress is steady and structured.

Transforming and refining

I’m updating the code to convert categorical variables, incorporate normality tests, add overall sections, and outline an optional merged group calculation. Progressing through each step methodically feels satisfying.

Finalizing and ensuring

I’m pulling together descriptive statistics, addressing edge cases like non-numeric TyG indices, and ensuring categorical variables are factors. The modifications are integrated with clear explanations and a focus on precision.

Finalizing the approach

I’ve decided to simplify the default statistical test, exclude unnecessary SMD calculations for more than two groups, and carefully integrate modifications with detailed comments for clarity.

Reasoned for 7m 22s

Antwort

下面是在你现有代码基础上做的“就地修改”,自动计算统计量并选择合适的检验方式:

  • 连续变量:自动做正态性检验(Shapiro–Wilk),正态显示为 均值±SD 并用 单因素方差分析;非正态显示为 中位数[IQR] 并用 Kruskal–Wallis(通过 nonnormal + testNonNormal=TRUE 实现)。
  • 分类变量:自动判断是否需要 Fisher 精确检验(当任何期望频数 < 5 或卡方不可用时),否则用 卡方检验(通过 exact= 指定)。
  • 仍保留 P 值SMD(注意:SMD 在多组分层时的含义有限,如果需要标准化差异,建议做二组比较)。

直接把你代码中“# 生成描述性统计表”这一段替换为下面整段即可(前面的读取数据与变量列表保持不变)。

r
# ========================= # 生成描述性统计表(自动选择检验) # ========================= library(tableone) # 1) 清理:确保分类是 factor、连续是数值 # (若 Excel 中把数值读成字符,会自动转为数值;无法转换的会成为 NA) cat_vars <- intersect(cat_vars, names(df)) cont_vars <- intersect(cont_vars, names(df)) for (v in cat_vars) { if (!is.factor(df[[v]])) df[[v]] <- factor(df[[v]]) } for (v in cont_vars) { if (!is.numeric(df[[v]])) df[[v]] <- suppressWarnings(as.numeric(df[[v]])) } # 2) 自动识别非正态连续变量(任一分组 Shapiro p<0.05 即视为非正态) is_nonnormal <- function(x, g) { sp <- split(x, g) pvals <- sapply(sp, function(z) { z <- z[is.finite(z)] n <- length(z) if (n < 3) return(NA_real_) z <- if (n > 5000) sample(z, 5000) else z # Shapiro 的样本上限 tryCatch(shapiro.test(z)$p.value, error = function(e) NA_real_) }) any(pvals < 0.05, na.rm = TRUE) } nonnormal_vars <- cont_vars[sapply(cont_vars, function(v) is_nonnormal(df[[v]], df$夜间高血压))] # 3) 自动判定哪些分类变量用 Fisher(期望频数<5 或卡方报错) need_fisher <- sapply(cat_vars, function(v) { tab <- table(df[[v]], df$夜间高血压, useNA = "no") if (nrow(tab) < 2 || ncol(tab) < 2) return(FALSE) # 无法检验 chi <- tryCatch(chisq.test(tab, correct = FALSE), error = function(e) NULL) if (is.null(chi)) return(TRUE) any(chi$expected < 5) }) exact_vars <- names(need_fisher)[need_fisher] # 4) 生成表 1(含 P 与 SMD;正态/非正态显示与检验分别处理) table1 <- CreateTableOne(vars = c(cat_vars, cont_vars), strata = "夜间高血压", data = df, factorVars = cat_vars) tab_mat <- print(table1, showAllLevels = TRUE, test = TRUE, # 连续变量:这些按中位数[IQR]显示,并用 Kruskal–Wallis nonnormal = nonnormal_vars, testNonNormal = TRUE, # 分类变量:这些用 Fisher 精确检验,其余用卡方 exact = exact_vars, smd = TRUE, contDigits = 2, pDigits = 3, quote = FALSE, noSpaces = TRUE, printToggle = FALSE) # 查看控制台输出 print(tab_mat) # (可选)导出到 Word # install.packages(c("officer","flextable")) # 如尚未安装 library(officer); library(flextable) out_file <- "~/毕业论文/中华高血压杂志V1/表1_基线(自动检验).docx" doc <- read_docx() ft <- flextable(data.frame(变量 = rownames(tab_mat), tab_mat, check.names = FALSE)) ft <- autofit(ft) doc <- body_add_par(doc, "表1 基线特征(正态=均值±SD;非正态=中位数[IQR];分类:卡方/Fisher 自动选择)", style = "heading 1") doc <- body_add_flextable(doc, ft) print(doc, target = out_file) message("已导出:", out_file)

说明与小贴士

  • 正态/非正态判断:用每个分组内的 Shapiro–Wilk(n≥3);任一组不满足正态则把该变量放入 nonnormal_varstableone::print() 会自动以 中位数[IQR] 展示并使用 Kruskal–Wallis。
  • Fisher 自动选择:对每个分类变量按分组构建列联表;若期望频数 < 5 或卡方不可用,则把该变量名放入 exact_vars,打印时对它们用 Fisher。
  • SMDtableone 在多组分层时提供的 SMD 仅作粗略参考;若论文需要严格的标准化差异,建议做二组比较(例如把 1–3 合并对 4),或单独计算效应量(如 η²、Cramér’s V)。
  • 如果出现“某变量不存在”之类的报错,检查变量名是否与 Excel 表头完全一致;代码里已用 intersect() 自动忽略不存在的变量名。

如果你还需要把 1–3 组合并 vs 4 再做一份二组比较(更利于展示 SMD),我也可以把相应的两行扩展代码补上。

Frage

把 1–3 组合并 vs 4

Gedanke

Constructing the merged factor

I’m thinking through merging groups 1-3 versus group 4, standardizing variables, and computing descriptive statistics and group comparisons. This involves creating the merged variable and using statistical tests like Shapiro and exact fisher.

Adjusting approach

I’m refining the strategy to handle nonnormal variables, ensuring accurate categorization, and potentially adding an overall column for a comprehensive view.

Crafting the code

I'm piecing together an R code snippet to append to the script, ensuring it handles nonnormal variables, includes P-values and SMD, and saves an output Word document.

Reasoned for 3m 51s

Antwort

好的!下面这段代码把原 1、2、3 组合并为“1–3合并”,与 4 组进行二组比较,并自动选择合适的检验方式(连续变量:正态→t 检验/均值±SD;非正态→Wilcoxon/中位数[IQR];分类变量:卡方或 Fisher)。把它接在你现有脚本后面(读入数据与变量列表已存在)即可运行。

r
## === 构造二分类分组:1–3合并 vs 4 === df$夜间高血压_合并 <- factor( ifelse(df$夜间高血压 %in% c("1","2","3"), "1-3合并", "4"), levels = c("1-3合并","4") ) ## === 类型校正(避免 Excel 读入为字符)=== cat_vars <- intersect(cat_vars, names(df)) cont_vars <- intersect(cont_vars, names(df)) for (v in cat_vars) if (!is.factor(df[[v]])) df[[v]] <- factor(df[[v]]) for (v in cont_vars) if (!is.numeric(df[[v]])) df[[v]] <- suppressWarnings(as.numeric(df[[v]])) ## === 连续变量:按分组自动判定非正态(任一组 Shapiro p<0.05 即非正态)=== is_nonnormal <- function(x, g) { sp <- split(x, g) pvals <- sapply(sp, function(z) { z <- z[is.finite(z)] n <- length(z) if (n < 3) return(NA_real_) z <- if (n > 5000) sample(z, 5000) else z tryCatch(shapiro.test(z)$p.value, error = function(e) NA_real_) }) any(pvals < 0.05, na.rm = TRUE) } nonnormal_vars_2 <- cont_vars[sapply(cont_vars, function(v) is_nonnormal(df[[v]], df$夜间高血压_合并))] ## === 分类变量:若期望频数<5 或卡方报错 → Fisher === need_fisher_2 <- sapply(cat_vars, function(v) { tab <- table(df[[v]], df$夜间高血压_合并, useNA = "no") if (nrow(tab) < 2 || ncol(tab) < 2) return(FALSE) chi <- tryCatch(chisq.test(tab, correct = FALSE), error = function(e) NULL) if (is.null(chi)) return(TRUE) any(chi$expected < 5) }) exact_vars_2 <- names(need_fisher_2)[need_fisher_2] ## === 二组比较表:1–3合并 vs 4(含P与SMD)=== library(tableone) tab_2 <- CreateTableOne(vars = c(cat_vars, cont_vars), strata = "夜间高血压_合并", data = df, factorVars = cat_vars) tab2_mat <- print(tab_2, showAllLevels = TRUE, test = TRUE, nonnormal = nonnormal_vars_2, # 非正态→中位数[IQR] + Wilcoxon testNonNormal = TRUE, exact = exact_vars_2, # 需要时用 Fisher smd = TRUE, # 二组的 SMD 有意义 contDigits = 2, pDigits = 3, quote = FALSE, noSpaces = TRUE, printToggle = FALSE) # 控制台查看 print(tab2_mat) ## === (可选)导出到 Word === # install.packages(c("officer","flextable")) # 如未安装 library(officer); library(flextable) out_file2 <- "~/毕业论文/中华高血压杂志V1/表1_1-3合并_vs_4.docx" doc2 <- read_docx() doc2 <- body_add_par(doc2, "表1:1–3合并 vs 4(二组比较)", style = "heading 1") ft2 <- flextable(data.frame(变量 = rownames(tab2_mat), tab2_mat, check.names = FALSE)) ft2 <- autofit(ft2) doc2 <- body_add_flextable(doc2, ft2) print(doc2, target = out_file2) message("已导出:", out_file2)

使用说明

  • 这段代码不改变你原来的 4 组表,只是新增一张“1–3合并 vs 4”的二组比较表。
  • 若你只想在后续分析里用合并分组,把 strata = "夜间高血压" 改成 strata = "夜间高血压_合并" 即可。
  • 二组比较的 SMD 更具可解释性;连续变量的 SMD 基于两组的合并标准差,分类变量为比例差的标准化。
  • 若你更倾向于固定显示某些变量为中位数[IQR](不做正态检验),可把这些变量名直接填进 nonnormal = c("变量A","变量B", ...)

需要把这张二组表再与“四组表”并排展示、或在四组表中插入“1–3合并(描述)”那一列,我也可以给你对应的拼接代码。

Diesen Q&A teilen