# check if 'librarian' is installed and if not, install it
if (! "librarian" %in% rownames(installed.packages()) ){
install.packages("librarian")
}
# load packages if not already loaded
::shelf(dplyr, broom) librarian
The 'cran_repo' argument in shelf() was not set, so it will use
cran_repo = 'https://cran.r-project.org' by default.
To avoid this message, set the 'cran_repo' argument to a CRAN
mirror URL (see https://cran.r-project.org/mirrors.html) or set
'quiet = TRUE'.
set.seed(1024)
# individual (true) dataset, with 100,000 rows
<- round(rnorm(1e5))
x <- round(x + x^2 + rnorm(1e5))
y <- data.frame(x, y)
ind
# aggregated dataset: grouped
<- ind %>%
agg ::group_by(x, y) |>
dplyr::summarize(freq = dplyr::n(), .groups = 'drop')
dplyr
<- list(
models "True" = lm(y ~ x, data = ind),
"Aggregated" = lm(y ~ x, data = agg),
"Aggregated & W" = lm(y ~ x, data = agg, weights=freq)
)
'True']] |> broom::tidy(conf.int = TRUE) models[[
# A tibble: 2 × 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 1.08 0.00580 187. 0 1.07 1.10
2 x 1.01 0.00558 181. 0 0.998 1.02
'Aggregated']] |> broom::tidy(conf.int = TRUE) models[[
# A tibble: 2 × 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 5.51 0.717 7.69 8.74e-11 4.08 6.95
2 x 0.910 0.302 3.01 3.69e- 3 0.306 1.51
'Aggregated & W']] |> broom::tidy(conf.int = TRUE) models[[
# A tibble: 2 × 7
term estimate std.error statistic p.value conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 1.08 0.224 4.84 0.00000795 0.637 1.53
2 x 1.01 0.216 4.68 0.0000145 0.579 1.44