λ―Έκ΅ λλ‘κ΅ν΅μμ κ΅(NHTSA)μ λ―Έκ΅μμ μνλλ μλμ°¨μ λν λ¦¬μ½ λ° κ³ κ°λΆλ§ λ°μ΄ν°λ₯Ό NHTSA Office of Defects Investigation (ODI) - Recalls μΉAPIλ₯Ό ν΅ν΄μ μ 곡νκ³ μλ€.
R ν©ν€μ§λ 곡κ°λμ΄ μμ΄ openNHTSAλ₯Ό ν΅ν΄μ λ³λ RESTful API νλ‘κ·Έλλ°μ μλ΅νκ³ R μ½λλ‘ μλμ°¨ 리μ½κ³Ό κ³ κ°λΆλ§ λ°μ΄ν°λ₯Ό κΈμ΄μ¬ μ μλ€.
openNHTSA μμ μ½λλ₯Ό μ°Έμ‘°νμ¬ 2001-2017λ κΉμ§ νλμ°¨μ κΈ°μμ°¨μ λν 리μ½κ³Ό λΆλ§ λ°μ΄ν°μ μν μ€μ μ μ€λΉνλ€. λλΆμ΄ νλμ°¨μ κΈ°μμ°¨λ₯Ό νΉμ μ§λ μμλ μ€μ νλ€.
# 0. νκ²½μ€μ ----------------
# library(openNHTSA) # devtools::install_github("statwonk/openNHTSA")
# library(tidyverse)
# library(stringr)
# library(forcats)
# library(ggpubr)
# library(extrafont)
# loadfonts()
## 1.0. NHTSA λ°μ΄ν° κΈμ΄μ€κΈ°
crawl_nhtsa <- function(maker, recall_complaint) {
nhtsa_df <- data.frame()
for(myear in 2001:2017) {
tmp_df <- facility(recall_complaint) %>%
model_year(myear) %>%
vehicle_make(maker) %>%
nhtsa_fetch()
nhtsa_df <- bind_rows(nhtsa_df, tmp_df)
}
return(nhtsa_df)
}
## 1.0. νλκΈ°μμλμ°¨ μμ
hkmc_cols <- c(KIA = "#ff0000",
HYUNDAI = "#0000ff")
crawl_nhtsa
ν¨μμ νλμ°¨(hyundai
)μ κΈ°μμ°¨(kia
) κ·Έλ¦¬κ³ λ¦¬μ½μΈμ(recalls
)λ₯Ό λ£μ΄ λ°μ΄ν°λ₯Ό κΈμ΄μ€κ³ μ΄λ₯Ό λ°νμΌλ‘ μ°λλ³ νλκΈ°μμ°¨ λ¦¬μ½ νμλ₯Ό μκ°ννλ€.
# 1. λ¦¬μ½ λ°μ΄ν° κ°μ Έμ€κΈ° ----------------
## 1.1. νλμ°¨
hyundai_df <- crawl_nhtsa("hyundai", "recalls")
## 1.2. κΈ°μμ°¨
kia_df <- crawl_nhtsa("kia", "recalls")
## 1.3. λ°μ΄ν° λ³ν©
recall_df <- bind_rows(hyundai_df, kia_df)
## 1.5. λ°μ΄ν° μκ°ν -------------
recall_df %>%
count(Make, ModelYear) %>%
ggplot(aes(x=ModelYear, y=n, group=Make, color=Make)) +
geom_line(size=1.1, alpha=0.3) +
geom_point(size=2, alpha=0.7) +
theme_pubr(base_family = "NanumGothic") +
labs(x="", y="리μ½νμ (2001 - 2017λ
)", title="μλμ°¨ μ μ‘°μ¬ λ¦¬μ½ μΆμΈ", color="μ μ‘°μ¬") +
scale_color_manual(values=hkmc_cols)
crawl_nhtsa
ν¨μμ νλμ°¨(hyundai
)μ κΈ°μμ°¨(kia
) κ·Έλ¦¬κ³ λΆλ§(complaints
)μΈμλ₯Ό λ£μ΄ λ°μ΄ν°λ₯Ό κΈμ΄μ€κ³ μ΄λ₯Ό λ°νμΌλ‘ μ°λλ³ νλκΈ°μμ°¨ λΆλ§ νμλ₯Ό μκ°ννλ€.
# 2. νμ λ°μ΄ν° κ°μ Έμ€κΈ° ----------------
## 2.1. νλμ°¨
hyundai_comp_df <- crawl_nhtsa("hyundai", "complaints")
## 2.2. κΈ°μμ°¨
kia_comp_df <- crawl_nhtsa("kia", "complaints")
## 2.3. λ°μ΄ν° λ³ν©
complaint_df <- bind_rows(hyundai_comp_df, kia_comp_df)
# 3. λ°μ΄ν° μκ°ν -------------
## 3.1. μ μ‘°μ¬λ³ μ°λ μΆμΈ
complaint_df %>%
count(Make, ModelYear) %>%
ggplot(aes(x=ModelYear, y=n, group=Make, color=Make)) +
geom_line() +
geom_point() +
theme_pubr(base_family = "NanumGothic") +
labs(x="", y="λΆλ§νμ", title="μλμ°¨ μ μ‘°μ¬ λΆλ§ μΆμΈ", color="μ μ‘°μ¬") +
scale_color_manual(values=hkmc_cols)
2001 - 2017λ νλκΈ°μμλμ°¨ λͺ¨λΈλ³ 리μ½κ³Ό λΆλ§ νμλ₯Ό μΈμ΄λ³΄μ.
# 3. λͺ¨λΈλ³ λ°μ΄ν° μκ°ν -------------
## 3.1. λͺ¨λΈλ³ λ¦¬μ½ λ°μ΄ν° μκ°ν
recall_df %>%
filter(str_detect(Make, "HYUNDAI|KIA")) %>%
count(Make, Model) %>%
ggplot(aes(x=reorder(Model, n), y=n, fill=Make)) +
geom_bar(stat="identity") +
coord_flip() +
labs(x="", y="리μ½νμ(2001 - 2017λ
)", title="νλ κΈ°μμλμ°¨ 리μ½νμ", fill="μ μ‘°μ¬") +
theme_pubr(base_family = "NanumGothic") +
scale_fill_manual(values=hkmc_cols) +
theme(axis.text = element_text(size=8))
## 3.2. λͺ¨λΈλ³ λΆλ§ λ°μ΄ν° μκ°ν
complaint_df %>%
filter(str_detect(Make, "HYUNDAI|KIA")) %>%
count(Make, Model) %>%
ggplot(aes(x=reorder(Model, n), y=n, fill=Make)) +
geom_bar(stat="identity") +
coord_flip() +
labs(x="", y="λΆλ§νμ(2001 - 2017λ
)", title="νλ κΈ°μμλμ°¨ λΆλ§νμ", fill="μ μ‘°μ¬") +
theme_pubr(base_family = "NanumGothic") +
scale_fill_manual(values=hkmc_cols) +
theme(axis.text = element_text(size=8))