1 환경 성과 지수

환경성과지수 (EPI, Environmental Performance Index)는 예일대와 컬럼비아 대학에서 1999부터 2005년 사이 공개한 Environmental Sustainability Index (ESI)를 계승한 것으로 세계경제포럼이 각구의 환경과 관련된 경제, 사회 정책을 종합적으로 평가하는 지수다.

1.1 지표 구성

가장 최근 지표구성에서 일부 변경사항이 있지만 2016년과 크게 차이가 나지 않는다. 가중치만 일부 차이가 있을 뿐이다.

library(tidyverse)
library(rvest)
library(httr)

epi_html <- read_html(x = "https://en.wikipedia.org/wiki/Environmental_Performance_Index")

epi_raw <- epi_html %>% 
  html_node(xpath = '//*[@id="mw-content-text"]/div[1]/table[1]') %>% 
  html_table(fill = FALSE, header = TRUE) %>% 
  as_tibble() %>% 
  janitor::clean_names()

epi_tbl <- epi_raw %>% 
  mutate(objective = ifelse(str_detect(objective, "Performance"), "Ecosystem vitality (60%)", objective)) %>% 
  separate(objective, into = c("objective", "obj_wgt"), sep = "\\(") %>% 
  mutate(obj_wgt = parse_number(obj_wgt)) %>%
  mutate(issue_category = ifelse(str_detect(issue_category, "Performance"), NA, issue_category)) %>% 
  mutate(issue_category = zoo::na.locf(issue_category)) %>% 
  separate(issue_category, into = c("issue", "issue_wgt"), sep = "\\(") %>% 
  mutate(issue_wgt = parse_number(issue_wgt)) %>% 
  mutate(ind_wgt = str_extract(indicator, pattern = "\\([0-9].*\\)$") %>% parse_number) %>% 
  mutate(indicator = str_remove(indicator, pattern = "\\([0-9].*\\)$"))

epi_tbl
# A tibble: 19 x 7
   epi         objective    obj_wgt issue    issue_wgt indicator         ind_wgt
   <chr>       <chr>          <dbl> <chr>        <dbl> <chr>               <dbl>
 1 Environmen… "Environmen…      40 "Health…        33 "Environmental R…     100
 2 Environmen… "Environmen…      40 "Air qu…        33 "Household Air Q…      30
 3 Environmen… "Environmen…      40 "Air qu…        33 "Air pollution -…      30
 4 Environmen… "Environmen…      40 "Air qu…        33 "Air pollution -…      30
 5 Environmen… "Environmen…      40 "Air qu…        33 "Air pollution -…      10
 6 Environmen… "Environmen…      40 "Water …        33 "Unsafe Sanitati…      50
 7 Environmen… "Environmen…      40 "Water …        33 "Drinking Water …      50
 8 Environmen… "Ecosystem …      60 "Water …        25 "Wastewater trea…     100
 9 Environmen… "Ecosystem …      60 "Agricu…        10 "Nitrogen use ef…      75
10 Environmen… "Ecosystem …      60 "Agricu…        10 "Nitrogen balanc…      25
11 Environmen… "Ecosystem …      60 "Forest…        10 "Change in fores…     100
12 Environmen… "Ecosystem …      60 "Fisher…         5 "Fish stocks "        100
13 Environmen… "Ecosystem …      60 "Biodiv…        25 "Terrestrial Pro…      20
14 Environmen… "Ecosystem …      60 "Biodiv…        25 "Terrestrial pro…      20
15 Environmen… "Ecosystem …      60 "Biodiv…        25 "Marine protecte…      20
16 Environmen… "Ecosystem …      60 "Biodiv…        25 "Species protect…      20
17 Environmen… "Ecosystem …      60 "Biodiv…        25 "Species protect…      20
18 Environmen… "Ecosystem …      60 "Climat…        25 "Trend in carbon…      75
19 Environmen… "Ecosystem …      60 "Climat…        25 "Trend in CO2 em…      25
library(collapsibleTree)

collapsibleTree(epi_tbl, 
                hierarchy  = c("objective", "issue", "indicator"),
                root       = "EPI",
                tooltip    = TRUE,
                attribute   = "ind_wgt", 
                collapsed  = TRUE, 
                width      = 800,
                zoomable   = FALSE)

2 EPI 국가 순위

2.1 데이터 크롤링

## 2020년 EPI 국가순위 
epi_2020_raw <- epi_html %>% 
  html_node(css = '#mw-content-text > div.mw-parser-output > div:nth-child(19)') %>% 
  html_nodes("li") %>% 
  html_text() %>% 
  str_trim() %>% 
  as_tibble()

epi_2020 <- epi_2020_raw %>% 
  mutate(epi_score   = parse_number(value)) %>% 
  mutate(country = str_extract(value, "[a-zA-Z\\s]*") %>% str_trim) %>% 
  mutate(rank = row_number(),
         year = 2020) %>% 
  select(year, rank, country, epi_score)

## 2018년 EPI 국가순위 
epi_2018 <- epi_html %>% 
  html_node(xpath = '//*[@id="mw-content-text"]/div[1]/table[2]') %>% 
  html_table(header = TRUE, fill = TRUE) %>% 
  janitor::clean_names() %>% 
  as_tibble() %>% 
  mutate(year = 2018) %>% 
  select(year, rank, country, epi_score)

## 2016년 EPI 국가순위 
epi_2016_raw <- epi_html %>% 
  html_node(css = '#mw-content-text > div.mw-parser-output > div:nth-child(27)') %>% 
  html_nodes("li") %>% 
  html_text() %>% 
  str_trim() %>% 
  as_tibble()

epi_2016 <- epi_2016_raw %>% 
  mutate(epi_score   = parse_number(value)) %>% 
  mutate(country = str_extract(value, "[a-zA-Z\\s]*") %>% str_trim) %>% 
  mutate(rank = row_number(),
         year = 2016) %>% 
  select(year, rank, country, epi_score)

## 2014년 EPI 국가순위 
epi_2014_raw <- epi_html %>% 
  html_node(css = '#mw-content-text > div.mw-parser-output > div:nth-child(32)') %>% 
  html_nodes("li") %>% 
  html_text() %>% 
  str_trim() %>% 
  as_tibble()

epi_2014 <- epi_2014_raw %>% 
  mutate(epi_score   = parse_number(value)) %>% 
  mutate(country = str_extract(value, "[a-zA-Z\\s]*") %>% str_trim) %>% 
  mutate(rank = row_number(),
         year = 2014) %>% 
  select(year, rank, country, epi_score)

## 전체 -----------------------
epi_tbl <- bind_rows(epi_2020, epi_2018) %>% 
  bind_rows(epi_2016) %>% 
  bind_rows(epi_2014)

epi_tbl %>% 
  write_rds("data/epi_tbl.rds")

2.2 시각화

library(ggflags) # remotes::install_github("ellisp/ggflags")
epi_tbl <- read_rds("data/epi_tbl.rds")

epi_tbl %>% 
  mutate(country = ifelse(str_detect(country, "United States"), "USA", country)) %>% 
  filter(str_detect(country, pattern = "(Korea)|(Canada)|(France)|(Italy)|(USA)|(Germany)|(Japan)")) %>% 
  mutate(flag = countrycode::countrycode(country, origin = 'country.name', destination = 'genc2c') %>% str_to_lower(.)) %>% 
  mutate(country = fct_reorder(country, -epi_score)) %>% 
  ggplot(aes(x=year, y = epi_score, group = country, color = country)) +
    geom_line() + 
    geom_point() +
    geom_flag(aes(country = flag), size = 5) +
    theme_minimal(base_family = "NanumGothic") +
    labs(x="", y="환경성과점수", color = "국가",
         title = "주요 경쟁국가 환경성과점수") 

 

데이터 과학자 이광춘 저작

kwangchun.lee.7@gmail.com