나무위키 여론조사 데이터 중 갤럽 조사 내용을 긁어와서 대권 후보 여론조사 결과를 데이터프레임으로 준비한다.
library(tidyverse)
library(rvest)
## 나무에서 여론조사 긁어오기 -----------------------------------------------------
Sys.setlocale("LC_ALL", "C")
"https://namu.wiki/w/제20대 대통령 선거/여론조사"
namu_url <- namu_url %>%
namu_html <- read_html(encoding = "utf-8")
2019 <- namu_html %>%
gallop_ html_nodes(xpath = '//*[@id="app"]/div/div[2]/article/div[3]/div[2]/div/div/div[10]/div[1]/table') %>%
html_table() %>%
.[[1]] %>%
as_tibble()
2020 <- namu_html %>%
gallop_ html_nodes(xpath = '//*[@id="app"]/div/div[2]/article/div[3]/div[2]/div/div/div[13]/div/table') %>%
html_table() %>%
.[[1]] %>%
as_tibble()
2021 <- namu_html %>%
gallop_ html_nodes(xpath = '//*[@id="app"]/div/div[2]/article/div[3]/div[2]/div/div/div[15]/div[1]/table') %>%
html_table() %>%
.[[1]] %>%
as_tibble()
Sys.setlocale("LC_ALL", "Korean")
## 데이터 정제 작업 -----------------------------------------------------------
2019_tbl <- gallop_2019 %>%
gallop_ set_names(gallop_2019 %>% slice(1)) %>%
filter(str_detect(월, "^[0-9]")) %>%
mutate(연도 = 2019)
2020_tbl <- gallop_2020 %>%
gallop_ set_names(gallop_2020 %>% slice(1)) %>%
filter(str_detect(월, "^[0-9]")) %>%
mutate(연도 = 2020)
2021_tbl <- gallop_2021 %>%
gallop_ set_names(gallop_2021 %>% slice(1)) %>%
janitor::clean_names(ascii = FALSE) %>%
select(!starts_with("na")) %>%
filter(str_detect(월, "^[0-9]")) %>%
mutate(연도 = 2021)
비교를 위해 후보자 추출 단계를 진행한다.
c(names(gallop_2019_tbl) %>% setdiff(c("월", "연도")),
names(gallop_2020_tbl) %>% setdiff(c("월", "연도")),
names(gallop_2021_tbl) %>% setdiff(c("월", "연도"))) %>%
table() %>%
sort(decreasing = TRUE)
c("이낙연", "이재명", "홍준표", "윤석열") candidates <-
2019_df <- gallop_2019_tbl %>%
gallop_ select(연도, 월, all_of(candidates %>% setdiff("윤석열"))) %>%
mutate(윤석열 = NA) %>%
mutate_if(is.character, parse_number) %>%
replace(., is.na(.), 0)
2020_df <- gallop_2020_tbl %>%
gallop_ select(연도, 월, all_of(candidates)) %>%
mutate_if(is.character, parse_number) %>%
replace(., is.na(.), 0)
2021_df <- gallop_2021_tbl %>%
gallop_ select(연도, 월, all_of(candidates)) %>%
mutate_if(is.character, parse_number) %>%
replace(., is.na(.), 0) %>%
group_by(연도, 월) %>%
summarise_all(mean) %>%
ungroup()
bind_rows(gallop_2019_df, gallop_2020_df) %>%
gallop_df <- bind_rows(gallop_2021_df)
gallop_df %>%
gallop_tbl <- mutate(년월 = lubridate::make_date(year = 연도, month = 월, day = 15)) %>%
select(년월, 이낙연:윤석열)
%>%
gallop_tbl write_csv("data/gallop_tbl_20210921.csv")
library(tidyverse)
gallop_tbl <- read_csv("data/gallop_tbl_20210921.csv")
%>%
gallop_tbl arrange(desc(년월)) %>%
reactable::reactable()
"https://w.namu.la/s/86688a03b1b55f21c021332ecded85c4b50043b3a07cb7ccfadd2b446c2758102831e3fa9abdbac4c3e5f171a9c7ce9ddcf917a89f64881ff672aca01f7456576636667eb6485cfd8cb0a29f0faad9040a60c9f86c1302420b39c93fe1af9adc768db82dd624d3c4f1dc8dcf3afa5c60"
이낙연 <- "https://w.namu.la/s/ed1bce056fcf1f4476a74d523a6a1790c4042217afb6ef2157f3ad5dd64ebd385acf48131579e8f904615f79a4d37861ab66ec1245f0d7bb3428b1f101015edb73dbf6a942126ee094695a4b2c56b970ec16ac755fc288c8799b79615d59d6b7db38c6cd41bb5154403a7f3058091a4e"
이재명 <- "https://w.namu.la/s/08a72765f4b9ac4da868da2b286777bbe62971f5fca5cb74a957f0e734b11506838e9953887efb88a39002b7951892f6e204a624de069fab5d11752a3ebb62601715763976c8141ed4ffc690aafcd4e4a760ab3bceda07fef9026c9a4d2d195411330792b0e320716c7c45781d61b53f"
홍준표 <- "https://w.namu.la/s/f87b603533b2894dd4a412765e0e28f79b57aaa036ca72e87096114eb2b9e1bf59cf0abdc6af964eef9434a6a657ff1787392d5e10d9d6c95f62efd8837a96e81cdaad7c76bbf4d3f9c89b392bfdef076e4c525dc7c28839a5cfdeb0dbed682d"
윤석열 <-
download.file(이낙연, destfile = "fig/이낙연.png", mode = "wb")
download.file(이재명, destfile = "fig/이재명.png", mode = "wb")
download.file(홍준표, destfile = "fig/홍준표.png", mode = "wb")
download.file(윤석열, destfile = "fig/윤석열.png", mode = "wb")
library(magick)
image_read("fig/이낙연.png")
LNY <- image_read("fig/이재명.png")
LJM <- image_read("fig/홍준표.png")
HJP <- image_read("fig/윤석열.png")
YSM <-
image_append(c(LNY, LJM, HJP, YSM)) %>%
image_resize("900")
%>%
gallop_tbl pivot_longer(이낙연:윤석열, names_to = "후보", values_to = "지지율") %>%
mutate(후보 = factor(후보, levels=c("이낙연", "이재명", "홍준표", "윤석열"))) %>%
mutate(후보 = fct_reorder(후보, 지지율, tail, n = 1, .desc = TRUE)) %>%
mutate(지지율 = 지지율 / 100) %>%
ggplot(aes(x=년월, y=지지율, color = 후보)) +
geom_line() +
geom_point() +
theme_minimal(base_family = "NanumGothic") +
scale_color_manual(values = c("blue", "red", "midnightblue", "pink")) +
labs(x="",
title = "제20대 대통령 선거/여론조사") +
theme(legend.position = "top") +
# guides(colour = guide_legend(nrow = 1)) +
scale_x_date(date_labels = "%y년 %m월") +
scale_y_continuous(labels = scales::percent) +
labs(x="",
title = "제20대 대통령 선거/여론조사",
subtitle = "2021년 9월 21일 기준",
caption = "자료: 나무위키 제20대 대통령 선거/여론조사 한국갤럽") +
theme(legend.position = "right",
plot.title=element_text(size=18, face="bold", family = "NanumBarunpen"),
plot.subtitle=element_text(face="bold", size=13, colour="grey10", family = "NanumBarunpen"))
library(gganimate)
library(extrafont)
loadfonts()
gallop_tbl %>%
polls_gg <- pivot_longer(이낙연:윤석열, names_to = "후보", values_to = "지지율") %>%
mutate(후보 = factor(후보, levels=c("이낙연", "이재명", "홍준표", "윤석열"))) %>%
mutate(후보 = fct_reorder(후보, 지지율, tail, n = 1, .desc = TRUE)) %>%
mutate(지지율 = 지지율 / 100) %>%
ggplot(aes(x = 년월, y = 지지율, color = 후보, group = 후보)) +
geom_line() +
geom_point(size = 2) +
scale_color_manual(values = c("blue", "red", "midnightblue", "pink")) +
geom_text(aes(x = as.Date("2021-11-01"), label = as.character(후보), family="NanumGothic"), hjust = 0) +
geom_segment(aes(xend = as.Date("2021-11-01"), yend = 지지율),
linetype = 3, colour = "grey50") +
theme_bw(base_family = "NanumGothic") +
labs(x="",
title = "제20대 대통령 선거/여론조사",
subtitle = "2021년 9월 21일 기준",
caption = "자료: 나무위키 제20대 대통령 선거/여론조사 한국갤럽") +
scale_x_date(date_labels = "%y년 %m월", limits = c(as.Date("2019-09-01"), as.Date("2021-12-15"))) +
scale_y_continuous(labels = scales::percent) +
theme(legend.position = "none",
plot.title=element_text(size=18, face="bold", family = "NanumBarunpen"),
plot.subtitle=element_text(face="bold", size=13, colour="grey10", family = "NanumBarunpen"))
polls_gg
+
polls_gg transition_reveal(년월)
library(ggimage)
library(ggtext)
library(extrafont)
loadfonts()
1.618
asp_ratio <-
gallop_tbl %>%
gallop_profile_gg <- filter(년월 == max(년월)) %>%
pivot_longer(이낙연:윤석열, names_to = "후보", values_to = "지지율") %>%
mutate(후보 = fct_reorder(후보, -지지율)) %>%
mutate(profile_href = glue::glue("fig/{후보}.png")) %>%
ggplot(aes(x=후보, y=지지율)) +
geom_col(width = 0.3) +
theme_minimal(base_family = "NanumGothic") +
labs(x="",
title = "제20대 대통령 선거/여론조사",
subtitle = "조사일자: 2021-09-21") +
geom_image(
aes(
x = 후보, y = 지지율,
image = profile_href,
asp = asp_ratio
),size = 0.07
+
) theme(aspect.ratio = 1/asp_ratio,
axis.text = element_text(colour = "grey50",
face = 'bold',
size = 13),
axis.title = element_text(colour = "black",
face = 'bold',
size = 15),
axis.title.y = element_text(size = 14,
colour = 'black',
face='bold',
angle = 00),
plot.title = element_text(size = 17,
face = 'bold'))
gallop_profile_gg
데이터 과학자 이광춘 저작
kwangchun.lee.7@gmail.com