나무위키 여론조사 데이터 중 갤럽 조사 내용을 긁어와서 대권 후보 여론조사 결과를 데이터프레임으로 준비한다.
library(tidyverse)
library(rvest)
## 나무에서 여론조사 긁어오기 -----------------------------------------------------
Sys.setlocale("LC_ALL", "C")
"https://namu.wiki/w/제20대 대통령 선거/여론조사"
namu_url <- namu_url %>%
namu_html <- read_html(encoding = "utf-8")
2019 <- namu_html %>%
gallop_ html_nodes(xpath = '//*[@id="app"]/div/div[2]/article/div[3]/div[2]/div/div/div[10]/div[1]/table') %>%
html_table() %>%
.[[1]] %>%
as_tibble()
2020 <- namu_html %>%
gallop_ html_nodes(xpath = '//*[@id="app"]/div/div[2]/article/div[3]/div[2]/div/div/div[13]/div/table') %>%
html_table() %>%
.[[1]] %>%
as_tibble()
2021 <- namu_html %>%
gallop_ html_nodes(xpath = '//*[@id="app"]/div/div[2]/article/div[3]/div[2]/div/div/div[16]/div/table') %>%
html_table() %>%
.[[1]] %>%
as_tibble()
Sys.setlocale("LC_ALL", "Korean")
## 데이터 정제 작업 -----------------------------------------------------------
2019_tbl <- gallop_2019 %>%
gallop_ set_names(gallop_2019 %>% slice(1)) %>%
filter(str_detect(월, "^[0-9]")) %>%
mutate(연도 = 2019)
2020_tbl <- gallop_2020 %>%
gallop_ set_names(gallop_2020 %>% slice(1)) %>%
filter(str_detect(월, "^[0-9]")) %>%
mutate(연도 = 2020)
2021_tbl <- gallop_2021 %>%
gallop_ set_names(gallop_2021 %>% slice(1)) %>%
filter(str_detect(월, "^[0-9]")) %>%
mutate(연도 = 2021)
비교를 위해 후보자 추출 단계를 진행한다.
c(names(gallop_2019_tbl) %>% setdiff(c("월", "연도")),
names(gallop_2020_tbl) %>% setdiff(c("월", "연도")),
names(gallop_2021_tbl) %>% setdiff(c("월", "연도"))) %>%
table() %>%
sort(decreasing = TRUE)
c("안철수", "오세훈", "이낙연", "이재명", "홍준표", "윤석열") candidates <-
2019_df <- gallop_2019_tbl %>%
gallop_ select(연도, 월, all_of(candidates %>% setdiff("윤석열"))) %>%
mutate(윤석열 = NA) %>%
mutate_if(is.character, parse_number) %>%
replace(., is.na(.), 0)
2020_df <- gallop_2020_tbl %>%
gallop_ select(연도, 월, all_of(candidates)) %>%
mutate_if(is.character, parse_number) %>%
replace(., is.na(.), 0)
2021_df <- gallop_2021_tbl %>%
gallop_ select(연도, 월, all_of(candidates)) %>%
mutate_if(is.character, parse_number) %>%
replace(., is.na(.), 0) %>%
group_by(연도, 월) %>%
summarise_all(mean) %>%
ungroup()
bind_rows(gallop_2019_df, gallop_2020_df) %>%
gallop_df <- bind_rows(gallop_2021_df)
gallop_df %>%
gallop_tbl <- mutate(년월 = lubridate::make_date(year = 연도, month = 월, day = 15)) %>%
select(년월, 안철수:윤석열)
%>%
gallop_tbl write_csv("data/gallop_tbl.csv")
library(tidyverse)
gallop_tbl <- read_csv("data/gallop_tbl.csv")
%>%
gallop_tbl reactable::reactable()
"https://w.namu.la/s/2e95ac847ec8efbe47f0ad7dbe51d33c6746a10eebafaac383a0d80290634f262506488ed6c0e811822c25ca586c0c0b52c83b9ebe3a8a38813b88b1077b43a90b1774262bdc96d1e98c033eee478dc2142b892d889466e16b3dc3c0c9421bbd0a8820f74e5cdad0eb69c395e11ed9be"
안철수 <- "https://w.namu.la/s/66dc586d13b34736a7da81a300027a0c0a5cf4204ebd7b9263752256a6854e02419925fd096c0fe8406157bef4df52dc7307031bb67968ab5c37a7c1d758eaa45367136d16f293a422dee53502262431c03e397b3e1bd080b8769cab8e250f1216f3c8f1e93dfb1c22cba5408fb8a017"
오세훈 <- "https://w.namu.la/s/86688a03b1b55f21c021332ecded85c4b50043b3a07cb7ccfadd2b446c2758102831e3fa9abdbac4c3e5f171a9c7ce9ddcf917a89f64881ff672aca01f7456576636667eb6485cfd8cb0a29f0faad9040a60c9f86c1302420b39c93fe1af9adc768db82dd624d3c4f1dc8dcf3afa5c60"
이낙연 <- "https://w.namu.la/s/ed1bce056fcf1f4476a74d523a6a1790c4042217afb6ef2157f3ad5dd64ebd385acf48131579e8f904615f79a4d37861ab66ec1245f0d7bb3428b1f101015edb73dbf6a942126ee094695a4b2c56b970ec16ac755fc288c8799b79615d59d6b7db38c6cd41bb5154403a7f3058091a4e"
이재명 <- "https://w.namu.la/s/08a72765f4b9ac4da868da2b286777bbe62971f5fca5cb74a957f0e734b11506838e9953887efb88a39002b7951892f6e204a624de069fab5d11752a3ebb62601715763976c8141ed4ffc690aafcd4e4a760ab3bceda07fef9026c9a4d2d195411330792b0e320716c7c45781d61b53f"
홍준표 <- "https://w.namu.la/s/f87b603533b2894dd4a412765e0e28f79b57aaa036ca72e87096114eb2b9e1bf59cf0abdc6af964eef9434a6a657ff1787392d5e10d9d6c95f62efd8837a96e81cdaad7c76bbf4d3f9c89b392bfdef076e4c525dc7c28839a5cfdeb0dbed682d"
윤석열 <-
download.file(안철수, destfile = "fig/안철수.png", mode = "wb")
download.file(오세훈, destfile = "fig/오세훈.png", mode = "wb")
download.file(이낙연, destfile = "fig/이낙연.png", mode = "wb")
download.file(이재명, destfile = "fig/이재명.png", mode = "wb")
download.file(홍준표, destfile = "fig/홍준표.png", mode = "wb")
download.file(윤석열, destfile = "fig/윤석열.png", mode = "wb")
library(magick)
image_read("fig/안철수.png")
ACS <- image_read("fig/오세훈.png")
OSH <- image_read("fig/이낙연.png")
LNY <- image_read("fig/이재명.png")
LJM <- image_read("fig/홍준표.png")
HJP <- image_read("fig/윤석열.png")
YSM <-
image_append(c(ACS, OSH, LNY, LJM, HJP, YSM)) %>%
image_resize("900")
%>%
gallop_tbl pivot_longer(안철수:윤석열, names_to = "후보", values_to = "지지율") %>%
mutate(후보 = factor(후보, levels=c("안철수", "오세훈", "이낙연", "이재명", "홍준표", "윤석열"))) %>%
ggplot(aes(x=년월, y=지지율, color = 후보)) +
geom_line() +
geom_point() +
theme_minimal(base_family = "NanumGothic") +
scale_color_manual(values = c("gray50", "gray50", "midnightblue", "blue", "gray50", "red")) +
labs(x="",
title = "제20대 대통령 선거/여론조사") +
theme(legend.position = "top") +
guides(colour = guide_legend(nrow = 1))
library(gganimate)
library(extrafont)
loadfonts()
gallop_tbl %>%
polls_gg <- pivot_longer(안철수:윤석열, names_to = "후보", values_to = "지지율") %>%
mutate(후보 = factor(후보, levels=c("안철수", "오세훈", "이낙연", "이재명", "홍준표", "윤석열"))) %>%
ggplot(aes(x = 년월, y = 지지율, color = 후보, group = 후보)) +
geom_line() +
geom_point(size = 2) +
scale_color_manual(values = c("gray50", "gray50", "midnightblue", "blue", "gray50", "red")) +
geom_text(aes(x = as.Date("2021-07-01"), label = as.character(후보), family="NanumGothic"), hjust = 0) +
geom_segment(aes(xend = as.Date("2021-07-01"), yend = 지지율),
linetype = 3, colour = "grey50") +
theme_bw(base_family = "NanumGothic") +
labs(x="",
title = "제20대 대통령 선거/여론조사") +
theme(legend.position = "none") +
scale_x_date(limits = c(as.Date("2019-09-01"), as.Date("2021-08-01")))
polls_gg
+
polls_gg transition_reveal(년월)
library(ggimage)
library(ggtext)
library(extrafont)
loadfonts()
1.618
asp_ratio <-
gallop_tbl %>%
gallop_profile_gg <- filter(년월 == max(년월)) %>%
pivot_longer(안철수:윤석열, names_to = "후보", values_to = "지지율") %>%
mutate(후보 = fct_reorder(후보, -지지율)) %>%
mutate(profile_href = glue::glue("fig/{후보}.png")) %>%
ggplot(aes(x=후보, y=지지율)) +
geom_col(width = 0.3) +
theme_minimal(base_family = "NanumGothic") +
labs(x="",
title = "제20대 대통령 선거/여론조사",
subtitle = "조사일자: 2021-05-15") +
geom_image(
aes(
x = 후보, y = 지지율,
image = profile_href,
asp = asp_ratio
),size = 0.07
+
) theme(aspect.ratio = 1/asp_ratio,
axis.text = element_text(colour = "grey50",
face = 'bold',
size = 13),
axis.title = element_text(colour = "black",
face = 'bold',
size = 15),
axis.title.y = element_text(size = 14,
colour = 'black',
face='bold',
angle = 00),
plot.title = element_text(size = 17,
face = 'bold'))
gallop_profile_gg
데이터 과학자 이광춘 저작
kwangchun.lee.7@gmail.com