선관위 웹사이트에서 공개된 대선 공보물을 데이터로 만듧니다.
중앙선거관리위원회 선거정보도서관, “후보자 선전물” 벽보, 공보, 공약서 등 후보자 선전물 검색하여 데이터로 만듧니다.
가장 최근 제19대 대통령 선거 공보물을 가져온다.
public_info_url <- "http://elecinfo.nec.go.kr/neweps/3/1/paperSearch.do?chs=&page=&epType=&start_file_sn=&end_file_sn=&svc_path_nm=&epid=&candidate_nm=&elect_ymd=&epdata_id=&ctl_no_id=&elect_sn=&party_nm=&win_yn=&elecDe=&turnDe=%B4%EB%C5%EB%B7%C9%BC%B1%B0%C5&elecType=10&code_id=10++&elecTypeMax=20&turnType=EPS0410++&cityType=EPS041019&guType=&sign_id=19&sido_nm=&elect_region_nm=&fieldName=candidate_nm&category=tms120tbl&myScraps=&kwd=&elecTypes=10&electionType=on&electionTurn=on&pageSize=20&order=elect_ymd&order_sort=desc"
public_info_html <- public_info_url %>%
read_html()
public_info_buttons <- public_info_html %>%
html_nodes(xpath = '//*[@id="content"]/div[5]/ul') %>%
html_nodes(css = "a") %>%
html_attr("href")
public_info_pdf_files <- public_info_buttons[str_detect(public_info_buttons, pattern = "start_file")]
public_info_pdf_files <- public_info_pdf_files[str_detect(public_info_pdf_files, pattern = "ECM0120170001")]
public_info_pdf_url <- glue::glue("http://elecinfo.nec.go.kr{public_info_pdf_files}")
public_info_pdf_url
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0001
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0010
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0012
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0014
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0015
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0002
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0003
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0004
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0005
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0006
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0007
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0008
http://elecinfo.nec.go.kr/neweps/common/download.do?svc_path_nm=/epdata/EP17/ECM2017/01/ECM0120170001&epid=ECM0120170001&start_file_sn=0009
get_public_info_url <- function(p_number) {
public_info_url <- glue::glue("http://elecinfo.nec.go.kr/neweps/3/1/paperSearch.do?chs=&page=&epType=&start_file_sn=&end_file_sn=&svc_path_nm=&epid=&candidate_nm=&elect_ymd=&epdata_id=&ctl_no_id=&elect_sn=&party_nm=&win_yn=&elecDe=&turnDe=%B4%EB%C5%EB%B7%C9%BC%B1%B0%C5&elecType=10&code_id=10++&elecTypeMax=20&turnType=EPS0410++&cityType=EPS0410{p_number}&guType=&sign_id={p_number}&sido_nm=&elect_region_nm=&fieldName=candidate_nm&category=tms120tbl&myScraps=&kwd=&elecTypes=10&electionType=on&electionTurn=on&pageSize=20&order=elect_ymd&order_sort=desc")
public_info_html <- public_info_url %>%
read_html()
public_info_buttons <- public_info_html %>%
html_nodes(xpath = '//*[@id="content"]/div[5]/ul') %>%
html_nodes(css = "a") %>%
html_attr("href")
public_info_pdf_files <- public_info_buttons[str_detect(public_info_buttons, pattern = "start_file")]
if (p_number =="19") {
public_info_pdf_files <- public_info_pdf_files[str_detect(public_info_pdf_files, pattern = "ECM0120170001")]
} else if (p_number =="18") {
public_info_pdf_files <- public_info_pdf_files[str_detect(public_info_pdf_files, pattern = "ECM0120120040")]
}
public_info_pdf_url <- glue::glue("http://elecinfo.nec.go.kr{public_info_pdf_files}")
return(public_info_pdf_url)
}
# get_public_info_url("18")
get_public_info_url("19")
all_poster_tbl <- read_csv("data/all_poster_tbl.csv")
## 제 19 대 대통령선거 -------
public_info_19_tbl <- all_poster_tbl %>%
mutate(기호 = str_remove_all(기호, "기호\\s+")) %>%
filter(선거 == "19대") %>%
mutate(pdf_url = get_public_info_url("19") ) %>%
mutate(filename = glue::glue("data/public_info/{선거}_{후보}_{기호}.pdf"))
map2(public_info_19_tbl$pdf_url, public_info_19_tbl$filename, download.file, mode = "wb")
## 제 18 대 대통령선거 -------
public_info_18_tbl <- all_poster_tbl %>%
mutate(기호 = str_remove_all(기호, "기호\\s+")) %>%
filter(선거 == "18대") %>%
mutate(pdf_url = get_public_info_url("18") ) %>%
mutate(filename = glue::glue("data/public_info/{선거}_{후보}_{기호}.pdf"))
map2(public_info_18_tbl$pdf_url, public_info_18_tbl$filename, download.file, mode = "wb")
knitr::include_graphics("data/public_info/19대_문재인_1번.pdf")
fs::dir_tree(path = "data/public_info/")
data/public_info/
├── 18대_강지원_6번.pdf
├── 18대_김소연_5번.pdf
├── 18대_김순자_7번.pdf
├── 18대_문재인_2번.pdf
├── 18대_박근혜_1번.pdf
├── 18대_박종선_4번.pdf
├── 18대_이정희_3번.pdf
├── 19대_김민찬_15번.pdf
├── 19대_김선동_10번.pdf
├── 19대_문재인_1번.pdf
├── 19대_심상정_5번.pdf
├── 19대_안철수_3번.pdf
├── 19대_오영국_7번.pdf
├── 19대_유승민_4번.pdf
├── 19대_윤홍식_14번.pdf
├── 19대_이경희_12번.pdf
├── 19대_이재오_9번.pdf
├── 19대_장성민_8번.pdf
├── 19대_조원진_6번.pdf
└── 19대_홍준표_2번.pdf