korea_tbl %>%
gather(variable, value, -lifeExp) %>%
ggplot(aes(x=value, y=lifeExp)) +
geom_point() +
geom_line() +
facet_wrap(~variable)
korea_tbl %>%
gather(variable, value, -lifeExp) %>%
ggplot(aes(x=value, y=lifeExp)) +
geom_point() +
geom_line() +
facet_wrap(~variable, scale = "free") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x="",
y="Life Expectancy",
title="Relationship between Life Expectancy and main factors")
hint: stackoverflow, “Rotating and spacing axis labels in ggplot2”
Call:
lm(formula = lifeExp ~ year, data = korea_tbl)
Residuals:
Min 1Q Median 3Q Max
-2.2745 -0.3700 0.0935 0.6660 1.7765
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.034e+03 3.888e+01 -26.61 1.3e-10 ***
year 5.554e-01 1.964e-02 28.28 7.1e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.174 on 10 degrees of freedom
Multiple R-squared: 0.9877, Adjusted R-squared: 0.9864
F-statistic: 799.8 on 1 and 10 DF, p-value: 7.104e-11
broom
# A tibble: 1 x 11
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
<dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
1 0.988 0.986 1.17 800. 7.10e-11 2 -17.9 41.7 43.2
# ... with 2 more variables: deviance <dbl>, df.residual <int>
# A tibble: 2 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) -1034. 38.9 -26.6 1.30e-10
2 year 0.555 0.0196 28.3 7.10e-11
.resid
Min. :-2.2745
1st Qu.:-0.3700
Median : 0.0935
Mean : 0.0000
3rd Qu.: 0.6660
Max. : 1.7765
# A tibble: 1 x 11
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
<dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
1 0.997 0.996 0.663 845. 2.41e-10 4 -9.66 29.3 31.7
# ... with 2 more variables: deviance <dbl>, df.residual <int>
# A tibble: 4 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) -1940. 588. -3.30 0.0108
2 year 1.02 0.305 3.35 0.0101
3 pop -0.455 0.427 -1.07 0.317
4 gdpPercap -0.000555 0.000198 -2.80 0.0232
korea_full_lm <- lm(lifeExp ~ ., data = korea_tbl)
korea_step <- MASS::stepAIC(korea_full_lm, trace = FALSE)
korea_step
Call:
lm(formula = lifeExp ~ year + gdpPercap, data = korea_tbl)
Coefficients:
(Intercept) year gdpPercap
-1.317e+03 6.998e-01 -3.595e-04
gap_nest <- gapminder %>%
select(-continent) %>%
group_by(country) %>%
nest()
## Hello model
(gap_many_tbl <- gap_nest %>%
mutate(model = map(data, ~lm(lifeExp ~ year, data=.))) %>%
mutate(model_glance = map(model, broom::glance),
rsquare = map_dbl(model_glance, ~.$r.squared)) %>%
arrange(rsquare))
# A tibble: 142 x 5
# Groups: country [142]
country data model model_glance rsquare
<fct> <list<df[,4]>> <list> <list> <dbl>
1 Rwanda [12 x 4] <lm> <tibble [1 x 11]> 0.0172
2 Botswana [12 x 4] <lm> <tibble [1 x 11]> 0.0340
3 Zimbabwe [12 x 4] <lm> <tibble [1 x 11]> 0.0562
4 Zambia [12 x 4] <lm> <tibble [1 x 11]> 0.0598
5 Swaziland [12 x 4] <lm> <tibble [1 x 11]> 0.0682
6 Lesotho [12 x 4] <lm> <tibble [1 x 11]> 0.0849
7 Cote d'Ivoire [12 x 4] <lm> <tibble [1 x 11]> 0.283
8 South Africa [12 x 4] <lm> <tibble [1 x 11]> 0.312
9 Uganda [12 x 4] <lm> <tibble [1 x 11]> 0.342
10 Congo, Dem. Rep. [12 x 4] <lm> <tibble [1 x 11]> 0.348
# ... with 132 more rows
worst_country <- gap_many_tbl %>%
pull(country) %>%
head(3) %>%
as.character()
best_country <- gap_many_tbl %>%
pull(country) %>%
tail(3) %>%
as.character()
countries <- c(worst_country, best_country)
gapminder %>%
filter(country %in% countries) %>%
mutate(country = as.character(country)) %>%
select(country, year, lifeExp) %>%
mutate(country = fct_relevel(country, countries)) %>%
mutate(worst_best = if_else(country %in% best_country, "Best", "Worst")) %>%
ggplot(aes(x=year, y=lifeExp, color=worst_best)) +
geom_point() +
geom_line() +
facet_wrap(~country, scale = "free") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x="",
y="Life Expectancy",
title="Relationship between Life Expectancy and main factors")
gap_many_tbl %>%
select(country, data, rsquare) %>%
filter(country %in% countries) %>%
unnest(data) %>%
ungroup() %>%
mutate(country = as.character(country)) %>%
select(country, year, lifeExp, rsquare) %>%
mutate(country = fct_relevel(country, countries)) %>%
mutate(country = glue::glue("{country}, {round(rsquare, 3)}")) %>%
ggplot(aes(x=year, y=lifeExp)) +
geom_point() +
geom_line() +
facet_wrap(~country, scale = "free") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(x="",
y="Life Expectancy",
title="Relationship between Life Expectancy and main factors",
color="")