공보물

선관위 웹사이트에서 공개된 대선 공보물을 데이터로 만듧니다.

공보물 PDF 데이터

중앙선거관리위원회 선거정보도서관, “후보자 선전물” 벽보, 공보, 공약서 등 후보자 선전물 검색하여 데이터로 만듧니다.

제 19 대 대통령

library(tidyverse)
library(rvest)
library(httr)

poster_president_19_tbl <- read_csv("data/poster_president_19_tbl.csv")

대통령 공보물 가져오기

가장 최근 제19대 대통령 선거 공보물을 가져온다.

벽보 공보물 위치

public_info_url <- "http://elecinfo.nec.go.kr/neweps/3/1/paperSearch.do?chs=&page=&epType=&start_file_sn=&end_file_sn=&svc_path_nm=&epid=&candidate_nm=&elect_ymd=&epdata_id=&ctl_no_id=&elect_sn=&party_nm=&win_yn=&elecDe=&turnDe=%B4%EB%C5%EB%B7%C9%BC%B1%B0%C5&elecType=10&code_id=10++&elecTypeMax=20&turnType=EPS0410++&cityType=EPS041019&guType=&sign_id=19&sido_nm=&elect_region_nm=&fieldName=candidate_nm&category=tms120tbl&myScraps=&kwd=&elecTypes=10&electionType=on&electionTurn=on&pageSize=20&order=elect_ymd&order_sort=desc"

public_info_html <- public_info_url %>% 
    read_html()

public_info_buttons <- public_info_html %>% 
    html_nodes(xpath = '//*[@id="content"]/div[5]/ul') %>% 
    html_nodes(css = "a") %>% 
    html_attr("href")

public_info_pdf_files <- public_info_buttons[str_detect(public_info_buttons, pattern = "start_file")] 

public_info_pdf_files <- public_info_pdf_files[str_detect(public_info_pdf_files, pattern = "ECM0120170001")]    

public_info_pdf_url <- glue::glue("http://elecinfo.nec.go.kr{public_info_pdf_files}")

public_info_pdf_url
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0001
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0010
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0012
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0014
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0015
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0002
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0003
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0004
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0005
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0006
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0007
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0008
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0009

공보물 PDF 파일 함수

get_public_info_url <- function(p_number) {
    public_info_url <- glue::glue("http://elecinfo.nec.go.kr/neweps/3/1/paperSearch.do?chs=&page=&epType=&start_file_sn=&end_file_sn=&svc_path_nm=&epid=&candidate_nm=&elect_ymd=&epdata_id=&ctl_no_id=&elect_sn=&party_nm=&win_yn=&elecDe=&turnDe=%B4%EB%C5%EB%B7%C9%BC%B1%B0%C5&elecType=10&code_id=10++&elecTypeMax=20&turnType=EPS0410++&cityType=EPS0410{p_number}&guType=&sign_id={p_number}&sido_nm=&elect_region_nm=&fieldName=candidate_nm&category=tms120tbl&myScraps=&kwd=&elecTypes=10&electionType=on&electionTurn=on&pageSize=20&order=elect_ymd&order_sort=desc")
  
  public_info_html <- public_info_url %>% 
      read_html()
  
  public_info_buttons <- public_info_html %>% 
      html_nodes(xpath = '//*[@id="content"]/div[5]/ul') %>% 
      html_nodes(css = "a") %>% 
      html_attr("href")
  
  public_info_pdf_files <- public_info_buttons[str_detect(public_info_buttons, pattern = "start_file")] 
  
  if (p_number =="19") {
    public_info_pdf_files <- public_info_pdf_files[str_detect(public_info_pdf_files, pattern = "ECM0120170001")]
  } else if (p_number =="18") {
    public_info_pdf_files <- public_info_pdf_files[str_detect(public_info_pdf_files, pattern = "ECM0120120040")]
  }
      
  public_info_pdf_url <- glue::glue("http://elecinfo.nec.go.kr{public_info_pdf_files}")
  
  return(public_info_pdf_url)
}
# get_public_info_url("18")
get_public_info_url("19")

공보물 PDF 다운로드

all_poster_tbl <- read_csv("data/all_poster_tbl.csv")

## 제 19 대 대통령선거 -------
public_info_19_tbl <- all_poster_tbl %>% 
  mutate(기호 = str_remove_all(기호, "기호\\s+")) %>% 
  filter(선거 == "19대") %>% 
  mutate(pdf_url = get_public_info_url("19") ) %>% 
  mutate(filename  = glue::glue("data/public_info/{선거}_{후보}_{기호}.pdf"))

map2(public_info_19_tbl$pdf_url, public_info_19_tbl$filename, download.file, mode = "wb")

## 제 18 대 대통령선거 -------
public_info_18_tbl <- all_poster_tbl %>% 
  mutate(기호 = str_remove_all(기호, "기호\\s+")) %>% 
  filter(선거 == "18대") %>% 
  mutate(pdf_url = get_public_info_url("18") ) %>% 
  mutate(filename  = glue::glue("data/public_info/{선거}_{후보}_{기호}.pdf"))

map2(public_info_18_tbl$pdf_url, public_info_18_tbl$filename, download.file, mode = "wb")

공보물 결합

knitr::include_graphics("data/public_info/19대_문재인_1번.pdf")

PDF 공보물 살펴보기

fs::dir_tree(path = "data/public_info/")
data/public_info/
├── 18대_강지원_6번.pdf
├── 18대_김소연_5번.pdf
├── 18대_김순자_7번.pdf
├── 18대_문재인_2번.pdf
├── 18대_박근혜_1번.pdf
├── 18대_박종선_4번.pdf
├── 18대_이정희_3번.pdf
├── 19대_김민찬_15번.pdf
├── 19대_김선동_10번.pdf
├── 19대_문재인_1번.pdf
├── 19대_심상정_5번.pdf
├── 19대_안철수_3번.pdf
├── 19대_오영국_7번.pdf
├── 19대_유승민_4번.pdf
├── 19대_윤홍식_14번.pdf
├── 19대_이경희_12번.pdf
├── 19대_이재오_9번.pdf
├── 19대_장성민_8번.pdf
├── 19대_조원진_6번.pdf
└── 19대_홍준표_2번.pdf