{gtsummary}

What is {gtsummary}?

  • Create tables that are publication-ready
  • Highly customizable
  • Descriptive tables, regression tables, etc.

gtsummary::tbl_summary()

library(gtsummary)

tbl_summary(
  nlsy,
  by = sex_cat,
  include = c(sex_cat, race_eth_cat, region_cat,
              eyesight_cat, glasses, age_bir))
Characteristic Male, N = 6,4031 Female, N = 6,2831
race_eth_cat
    Hispanic 1,000 (16%) 1,002 (16%)
    Black 1,613 (25%) 1,561 (25%)
    Non-Black, Non-Hispanic 3,790 (59%) 3,720 (59%)
region_cat
    Northeast 1,296 (21%) 1,254 (20%)
    North Central 1,488 (24%) 1,446 (23%)
    South 2,251 (36%) 2,317 (38%)
    West 1,253 (20%) 1,142 (19%)
    Unknown 115 124
eyesight_cat
    Excellent 1,582 (38%) 1,334 (31%)
    Very good 1,470 (35%) 1,500 (35%)
    Good 792 (19%) 1,002 (23%)
    Fair 267 (6.4%) 365 (8.5%)
    Poor 47 (1.1%) 85 (2.0%)
    Unknown 2,245 1,997
glasses 1,566 (38%) 2,328 (54%)
    Unknown 2,241 1,995
age_bir 25 (21, 29) 22 (19, 27)
    Unknown 3,652 3,091
1 n (%); Median (IQR)

tbl_summary(
  nlsy,
  by = sex_cat,
  include = c(sex_cat, race_eth_cat, region_cat,
              eyesight_cat, glasses, age_bir),
  label = list(
    race_eth_cat ~ "Race/ethnicity",
    region_cat ~ "Region",
    eyesight_cat ~ "Eyesight",
    glasses ~ "Wears glasses",
    age_bir ~ "Age at first birth"
  ),
  missing_text = "Missing")
Characteristic Male, N = 6,4031 Female, N = 6,2831
Race/ethnicity
    Hispanic 1,000 (16%) 1,002 (16%)
    Black 1,613 (25%) 1,561 (25%)
    Non-Black, Non-Hispanic 3,790 (59%) 3,720 (59%)
Region
    Northeast 1,296 (21%) 1,254 (20%)
    North Central 1,488 (24%) 1,446 (23%)
    South 2,251 (36%) 2,317 (38%)
    West 1,253 (20%) 1,142 (19%)
    Missing 115 124
Eyesight
    Excellent 1,582 (38%) 1,334 (31%)
    Very good 1,470 (35%) 1,500 (35%)
    Good 792 (19%) 1,002 (23%)
    Fair 267 (6.4%) 365 (8.5%)
    Poor 47 (1.1%) 85 (2.0%)
    Missing 2,245 1,997
Wears glasses 1,566 (38%) 2,328 (54%)
    Missing 2,241 1,995
Age at first birth 25 (21, 29) 22 (19, 27)
    Missing 3,652 3,091
1 n (%); Median (IQR)

tbl_summary(
  nlsy,
  by = sex_cat,
  include = c(sex_cat, race_eth_cat,
              eyesight_cat, glasses, age_bir),
  label = list(
    race_eth_cat ~ "Race/ethnicity",
    eyesight_cat ~ "Eyesight",
    glasses ~ "Wears glasses",
    age_bir ~ "Age at first birth"
  ),
  missing_text = "Missing") |> 
  add_p(test = list(all_continuous() ~ "t.test", 
                    all_categorical() ~ "chisq.test")) |> 
  add_overall(col_label = "**Total**") |> 
  bold_labels() |> 
  modify_footnote(update = everything() ~ NA) |> 
  modify_header(label = "**Variable**", p.value = "**P**")
Variable Overall, N = 12,686 Male, N = 6,403 Female, N = 6,283 P
Race/ethnicity 0.8
    Hispanic 2,002 (16%) 1,000 (16%) 1,002 (16%)
    Black 3,174 (25%) 1,613 (25%) 1,561 (25%)
    Non-Black, Non-Hispanic 7,510 (59%) 3,790 (59%) 3,720 (59%)
Eyesight <0.001
    Excellent 2,916 (35%) 1,582 (38%) 1,334 (31%)
    Very good 2,970 (35%) 1,470 (35%) 1,500 (35%)
    Good 1,794 (21%) 792 (19%) 1,002 (23%)
    Fair 632 (7.5%) 267 (6.4%) 365 (8.5%)
    Poor 132 (1.6%) 47 (1.1%) 85 (2.0%)
    Missing 4,242 2,245 1,997
Wears glasses 3,894 (46%) 1,566 (38%) 2,328 (54%) <0.001
    Missing 4,236 2,241 1,995
Age at first birth 23 (20, 28) 25 (21, 29) 22 (19, 27) <0.001
    Missing 6,743 3,652 3,091

tbl_summary()

  • Incredibly customizeable
  • Really helpful with Table 1
  • I often just view in the web browser and copy and paste into a Word document
  • Can also be used within quarto/R Markdown1
  • If output is Word, I use as_flex_table()
  • Make even more customizeable with as_gt()
    • Then can output to Word with gt::as_word()

Univariate regressions

Fit a series of univariate regressions of income on other variables.

tbl_uvregression(
  nlsy, 
  y = income,
  include = c(sex_cat, race_eth_cat,
              eyesight_cat, income, age_bir),
  method = lm)
Characteristic N Beta 95% CI1 p-value
age_bir 4,773 595 538, 652 <0.001
sex_cat 10,195
    Male
    Female -358 -844, 128 0.15
race_eth_cat 10,195
    Hispanic
    Black -1,747 -2,507, -988 <0.001
    Non-Black, Non-Hispanic 3,863 3,195, 4,530 <0.001
eyesight_cat 6,789
    Excellent
    Very good -578 -1,319, 162 0.13
    Good -1,863 -2,719, -1,006 <0.001
    Fair -4,674 -5,910, -3,439 <0.001
    Poor -6,647 -9,154, -4,140 <0.001
1 CI = Confidence Interval

Can also do logistic regression

tbl_uvregression(
  nlsy, 
  y = glasses,
  include = c(sex_cat, race_eth_cat,
              eyesight_cat, glasses, age_bir),
  method = glm,
  method.args = list(family = binomial()),
  exponentiate = TRUE)
Characteristic N OR1 95% CI1 p-value
age_bir 5,813 1.02 1.01, 1.03 <0.001
sex_cat 8,450
    Male
    Female 1.97 1.81, 2.15 <0.001
race_eth_cat 8,450
    Hispanic
    Black 0.76 0.67, 0.86 <0.001
    Non-Black, Non-Hispanic 1.34 1.19, 1.50 <0.001
eyesight_cat 8,444
    Excellent
    Very good 0.93 0.84, 1.03 0.2
    Good 0.95 0.84, 1.07 0.4
    Fair 0.81 0.68, 0.96 0.016
    Poor 1.15 0.81, 1.63 0.4
1 OR = Odds Ratio, CI = Confidence Interval

Customizable just like tbl_summary()

Some regressions

linear_model <- lm(income ~ sex_cat + age_bir + race_eth_cat, 
                   data = nlsy)
linear_model_int <- lm(income ~ sex_cat*age_bir + race_eth_cat, 
                   data = nlsy)
logistic_model <- glm(glasses ~ eyesight_cat + sex_cat + income, 
                      data = nlsy, family = binomial())

gtsummary::tbl_regression()

tbl_regression(
  linear_model, 
  intercept = TRUE,
  label = list(
    sex_cat ~ "Sex",
    race_eth_cat ~ "Race/ethnicity",
    age_bir ~ "Age at first birth"
  ))
Characteristic Beta 95% CI1 p-value
(Intercept) 2,147 493, 3,802 0.011
Sex
    Male
    Female 25 -654, 705 >0.9
Age at first birth 438 381, 495 <0.001
Race/ethnicity
    Hispanic
    Black -772 -1,714, 171 0.11
    Non-Black, Non-Hispanic 7,559 6,676, 8,442 <0.001
1 CI = Confidence Interval
tbl_regression(
  logistic_model, 
  exponentiate = TRUE,
  label = list(
    sex_cat ~ "Sex",
    eyesight_cat ~ "Eyesight",
    income ~ "Income"
  ))
Characteristic OR1 95% CI1 p-value
Eyesight
    Excellent
    Very good 0.92 0.82, 1.03 0.2
    Good 0.92 0.80, 1.05 0.2
    Fair 0.80 0.66, 0.98 0.028
    Poor 1.03 0.69, 1.53 0.9
Sex
    Male
    Female 2.04 1.85, 2.25 <0.001
Income 1.00 1.00, 1.00 <0.001
1 OR = Odds Ratio, CI = Confidence Interval

You could put several together

tbl_no_int <- tbl_regression(
  linear_model, 
  intercept = TRUE,
  label = list(
    sex_cat ~ "Sex",
    race_eth_cat ~ "Race/ethnicity",
    age_bir ~ "Age at first birth"
  ))

tbl_int <- tbl_regression(
  linear_model_int, 
  intercept = TRUE,
  label = list(
    sex_cat ~ "Sex",
    race_eth_cat ~ "Race/ethnicity",
    age_bir ~ "Age at first birth",
    `sex_cat:age_bir` ~ "Sex/age interaction"
  ))

You could put several together

tbl_merge(list(tbl_no_int, tbl_int), 
          tab_spanner = c("**Model 1**", "**Model 2**"))
Characteristic Model 1 Model 2
Beta 95% CI1 p-value Beta 95% CI1 p-value
(Intercept) 2,147 493, 3,802 0.011 4,064 1,884, 6,245 <0.001
Sex
    Male
    Female 25 -654, 705 >0.9 -3,635 -6,432, -838 0.011
Age at first birth 438 381, 495 <0.001 364 285, 443 <0.001
Race/ethnicity
    Hispanic
    Black -772 -1,714, 171 0.11 -759 -1,701, 183 0.11
    Non-Black, Non-Hispanic 7,559 6,676, 8,442 <0.001 7,550 6,668, 8,433 <0.001
Sex/age interaction
    Female * Age at first birth 149 39, 260 0.008
1 CI = Confidence Interval

Inline text

Instead of copying and pasting numbers from the table, pull them off automatically:

inline_text(tbl_no_int, sex_cat, level = "Female")

which produces: 25 (95% CI -654, 705; p>0.9)

and which can be further customized:

inline_text(tbl_no_int, sex_cat, level = "Female", pattern = "$\\beta = {estimate}$")

which produces: \(\beta = 25\)

bonus: broom::tidy()

bind_rows(
  broom::tidy(linear_model, conf.int = TRUE),
  broom::tidy(linear_model_int, conf.int = TRUE),
  .id = "model"
) |>
  mutate(model = factor(model, 
                        labels = c("main terms", "sex-age interaction"))) |> 
  ggplot(aes(x = term, y = estimate, 
             ymin = conf.low, ymax = conf.high,
             color = model)) +
  geom_point(position = position_dodge(width = .5)) +
  geom_errorbar(position = position_dodge(width = .5))