환경성과지수 (EPI, Environmental Performance Index)는 예일대와 컬럼비아 대학에서 1999부터 2005년 사이 공개한 Environmental Sustainability Index (ESI)를 계승한 것으로 세계경제포럼이 각구의 환경과 관련된 경제, 사회 정책을 종합적으로 평가하는 지수다.
가장 최근 지표구성에서 일부 변경사항이 있지만 2016년과 크게 차이가 나지 않는다. 가중치만 일부 차이가 있을 뿐이다.
library(tidyverse)
library(rvest)
library(httr)
<- read_html(x = "https://en.wikipedia.org/wiki/Environmental_Performance_Index")
epi_html
<- epi_html %>%
epi_raw html_node(xpath = '//*[@id="mw-content-text"]/div[1]/table[1]') %>%
html_table(fill = FALSE, header = TRUE) %>%
as_tibble() %>%
::clean_names()
janitor
<- epi_raw %>%
epi_tbl mutate(objective = ifelse(str_detect(objective, "Performance"), "Ecosystem vitality (60%)", objective)) %>%
separate(objective, into = c("objective", "obj_wgt"), sep = "\\(") %>%
mutate(obj_wgt = parse_number(obj_wgt)) %>%
mutate(issue_category = ifelse(str_detect(issue_category, "Performance"), NA, issue_category)) %>%
mutate(issue_category = zoo::na.locf(issue_category)) %>%
separate(issue_category, into = c("issue", "issue_wgt"), sep = "\\(") %>%
mutate(issue_wgt = parse_number(issue_wgt)) %>%
mutate(ind_wgt = str_extract(indicator, pattern = "\\([0-9].*\\)$") %>% parse_number) %>%
mutate(indicator = str_remove(indicator, pattern = "\\([0-9].*\\)$"))
epi_tbl
# A tibble: 19 x 7
epi objective obj_wgt issue issue_wgt indicator ind_wgt
<chr> <chr> <dbl> <chr> <dbl> <chr> <dbl>
1 Environmen… "Environmen… 40 "Health… 33 "Environmental R… 100
2 Environmen… "Environmen… 40 "Air qu… 33 "Household Air Q… 30
3 Environmen… "Environmen… 40 "Air qu… 33 "Air pollution -… 30
4 Environmen… "Environmen… 40 "Air qu… 33 "Air pollution -… 30
5 Environmen… "Environmen… 40 "Air qu… 33 "Air pollution -… 10
6 Environmen… "Environmen… 40 "Water … 33 "Unsafe Sanitati… 50
7 Environmen… "Environmen… 40 "Water … 33 "Drinking Water … 50
8 Environmen… "Ecosystem … 60 "Water … 25 "Wastewater trea… 100
9 Environmen… "Ecosystem … 60 "Agricu… 10 "Nitrogen use ef… 75
10 Environmen… "Ecosystem … 60 "Agricu… 10 "Nitrogen balanc… 25
11 Environmen… "Ecosystem … 60 "Forest… 10 "Change in fores… 100
12 Environmen… "Ecosystem … 60 "Fisher… 5 "Fish stocks " 100
13 Environmen… "Ecosystem … 60 "Biodiv… 25 "Terrestrial Pro… 20
14 Environmen… "Ecosystem … 60 "Biodiv… 25 "Terrestrial pro… 20
15 Environmen… "Ecosystem … 60 "Biodiv… 25 "Marine protecte… 20
16 Environmen… "Ecosystem … 60 "Biodiv… 25 "Species protect… 20
17 Environmen… "Ecosystem … 60 "Biodiv… 25 "Species protect… 20
18 Environmen… "Ecosystem … 60 "Climat… 25 "Trend in carbon… 75
19 Environmen… "Ecosystem … 60 "Climat… 25 "Trend in CO2 em… 25
library(collapsibleTree)
collapsibleTree(epi_tbl,
hierarchy = c("objective", "issue", "indicator"),
root = "EPI",
tooltip = TRUE,
attribute = "ind_wgt",
collapsed = TRUE,
width = 800,
zoomable = FALSE)
## 2020년 EPI 국가순위
<- epi_html %>%
epi_2020_raw html_node(css = '#mw-content-text > div.mw-parser-output > div:nth-child(19)') %>%
html_nodes("li") %>%
html_text() %>%
str_trim() %>%
as_tibble()
<- epi_2020_raw %>%
epi_2020 mutate(epi_score = parse_number(value)) %>%
mutate(country = str_extract(value, "[a-zA-Z\\s]*") %>% str_trim) %>%
mutate(rank = row_number(),
year = 2020) %>%
select(year, rank, country, epi_score)
## 2018년 EPI 국가순위
<- epi_html %>%
epi_2018 html_node(xpath = '//*[@id="mw-content-text"]/div[1]/table[2]') %>%
html_table(header = TRUE, fill = TRUE) %>%
::clean_names() %>%
janitoras_tibble() %>%
mutate(year = 2018) %>%
select(year, rank, country, epi_score)
## 2016년 EPI 국가순위
<- epi_html %>%
epi_2016_raw html_node(css = '#mw-content-text > div.mw-parser-output > div:nth-child(27)') %>%
html_nodes("li") %>%
html_text() %>%
str_trim() %>%
as_tibble()
<- epi_2016_raw %>%
epi_2016 mutate(epi_score = parse_number(value)) %>%
mutate(country = str_extract(value, "[a-zA-Z\\s]*") %>% str_trim) %>%
mutate(rank = row_number(),
year = 2016) %>%
select(year, rank, country, epi_score)
## 2014년 EPI 국가순위
<- epi_html %>%
epi_2014_raw html_node(css = '#mw-content-text > div.mw-parser-output > div:nth-child(32)') %>%
html_nodes("li") %>%
html_text() %>%
str_trim() %>%
as_tibble()
<- epi_2014_raw %>%
epi_2014 mutate(epi_score = parse_number(value)) %>%
mutate(country = str_extract(value, "[a-zA-Z\\s]*") %>% str_trim) %>%
mutate(rank = row_number(),
year = 2014) %>%
select(year, rank, country, epi_score)
## 전체 -----------------------
<- bind_rows(epi_2020, epi_2018) %>%
epi_tbl bind_rows(epi_2016) %>%
bind_rows(epi_2014)
%>%
epi_tbl write_rds("data/epi_tbl.rds")
library(ggflags) # remotes::install_github("ellisp/ggflags")
<- read_rds("data/epi_tbl.rds")
epi_tbl
%>%
epi_tbl mutate(country = ifelse(str_detect(country, "United States"), "USA", country)) %>%
filter(str_detect(country, pattern = "(Korea)|(Canada)|(France)|(Italy)|(USA)|(Germany)|(Japan)")) %>%
mutate(flag = countrycode::countrycode(country, origin = 'country.name', destination = 'genc2c') %>% str_to_lower(.)) %>%
mutate(country = fct_reorder(country, -epi_score)) %>%
ggplot(aes(x=year, y = epi_score, group = country, color = country)) +
geom_line() +
geom_point() +
geom_flag(aes(country = flag), size = 5) +
theme_minimal(base_family = "NanumGothic") +
labs(x="", y="환경성과점수", color = "국가",
title = "주요 경쟁국가 환경성과점수")
데이터 과학자 이광춘 저작
kwangchun.lee.7@gmail.com