Demographic statistics popularized by Hans Rosling’s TED talks.
library(tibble)
library(gapminder)
glimpse(gapminder)
Ah, rare data!
How JUICY they are!!
How…to let it tell?
library(tidyverse)
library(moments)
gapminder %>%
summarise(mean_life = mean(lifeExp),
median_life = median(lifeExp),
iqr_life = IQR(lifeExp),
# IQR = quantile(x, 3/4) - quantile(x, 1/4)
skew_life = skewness(lifeExp),
kurtosis_life = kurtosis(lifeExp))
library(broom)
sum_OneVar <- summary(gapminder$lifeExp) %>% tidy
library(summarytools)
freq(gapminder)
dfSummary(gapminder)
How does one variable relate to another?
library(summarytools)
gapminder$coldWar <- gapminder$year <= 1992
gapminder$asia <- gapminder$continent == "Asia"
ctable(gapminder$asia, gapminder$coldWar)
## Cross-Tabulation, Row Proportions
## asia * coldWar
## Data Frame: gapminder
##
## ------- --------- ------------- -------------- ---------------
## coldWar FALSE TRUE Total
## asia
## FALSE 327 (25.0%) 981 (75.0%) 1308 (100.0%)
## TRUE 99 (25.0%) 297 (75.0%) 396 (100.0%)
## Total 426 (25.0%) 1278 (75.0%) 1704 (100.0%)
## ------- --------- ------------- -------------- ---------------
Q: Does the average life expectancy changes before and after the Cold War?
\(H_{0}: \bar{LifeExpctancy}_{prio 1991} = \bar{LifeExpctancy}_{post 1991},\ \alpha = .05\)
gapminder$coldWar <- gapminder$year <= 1992
gapminder %>%
group_by(coldWar) %>%
summarise(mean(lifeExp))
t.test(gapminder$lifeExp[gapminder$year <= 1991], gapminder$lifeExp[gapminder$year > 1991])
t.test(gapminder$lifeExp[gapminder$year <= 1991], gapminder$lifeExp[gapminder$year > 1991], alternative = "greater", conf.level = .99)
Q: Does a country’s average life expectancy associate with its GDP per capital? (How about with populations?)
\(H_{0}: \rho_{(LifeExpectancy, GDP)} = 0,\ \alpha = .05\)
cor.test(gapminder$lifeExp, gapminder$gdpPercap)
select(gapminder, year, lifeExp, gdpPercap, pop) %>%
cor %>%
corrplot::corrplot.mixed()
Q: Does the average life expectancy vary across continents? (And over year?)
\(H_{0}: \mu_{Africa} = \mu_{Americas} = ... = \mu_{Oceania},\ \alpha = .05\)
aov(lifeExp ~ continent, data = gapminder) %>%
summary()
ANC(ovariate)OVA?
aov(lifeExp ~ continent + as.factor(year), data = gapminder) %>%
summary()
Q: How does a country’s average life expectancy change across continents and over time?
gapminder %>% count(lifeExp, continent, year)
lm(lifeExp ~ continent + year, data = gapminder) %>%
summary()
Controlling for time dependency?
\(t + t^2\)
lm(lifeExp ~ gdpPercap + year, data = gapminder) %>%
summary()
lm(lifeExp ~ gdpPercap + year + I(year^2), data = gapminder) %>%
summary()
Other transformations
sqrt(var)
, log(var)
library(arm)
lm(lifeExp ~ gdpPercap + year + I(year^2), data = gapminder) %>%
standardize() %>%
summary()
##
## Call:
## lm(formula = lifeExp ~ z.gdpPercap + z.year + I(z.year^2), data = gapminder)
##
## Residuals:
## Min 1Q Median 3Q Max
## -66.604 -6.867 1.140 7.552 21.267
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.5918 0.3526 171.842 < 2e-16 ***
## z.gdpPercap 13.2260 0.4801 27.550 < 2e-16 ***
## z.year 8.2472 0.4801 17.180 < 2e-16 ***
## I(z.year^2) -4.4719 1.0568 -4.231 2.45e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.646 on 1700 degrees of freedom
## Multiple R-squared: 0.4433, Adjusted R-squared: 0.4424
## F-statistic: 451.3 on 3 and 1700 DF, p-value: < 2.2e-16
Does the contribution of GDP per captia to a country’s life expectancy vary with different sizes of population?
lm(lifeExp ~ gdpPercap + year, data = gapminder) %>%
summary()
lm(lifeExp ~ gdpPercap * pop + year, data = gapminder) %>%
summary()
Hint: You must include both base terms in the model.
……
fit <- lm(lifeExp ~ gdpPercap + year, data = gapminder)
res <- resid(fit)
plot(fit, which = 1)
library(car)
outlierTest(fit)
# car::qqPlot(fit)
ncvTest(fit)
vif(fit)
durbinWatsonTest(fit)
library(performance)
model_performance(fit)
check_normality(fit)
check_outliers(fit)
glimpse(mtcars)
glm(vs ~ gear + wt + mpg, data = mtcars, family = "binomial") %>%
summary()
glm(vs ~ gear + wt + mpg, data = mtcars, family = "binomial") %>%
ggeffects::ggpredict()
polr(as.factor(gear) ~ wt + mpg, data = mtcars) %>%
summary()
library(ordinal)
clm(as.factor(gear) ~ wt + mpg, data = mtcars) %>%
summary()
library(nnet)
multinom(as.factor(cyl) ~ wt + mpg, data = mtcars) %>%
summary()
df_poisson <- data.frame(counts = c(18, 17, 15, 20, 10, 20, 25, 13, 12),
outcome = gl(3, 1, 9),
# Generate factors by specifying the pattern of their levels.
treatment = gl(3, 3))
glm(counts ~ outcome + treatment,
family = poisson(),
data = df_poisson) %>%
summary()
ctable
cor
aov
lm/glm/clm/multinom(Y ~ Xs, data)