Automotive News์ฌ์์ ๋งค๋ ์๋์ฐจ ๊ณต๊ธ์ฌ 100๊ฐ๋ฅผ ๋ฝ์์ ๋ฐ์ดํฐ๋ฅผ ์ ๊ณตํ๊ณ ์๋ค. Automotive News ์ ์ Top Supplier Rankings์ ๋ฐ์ดํฐ๊ฐ 2005๋ ๋ถํฐ ์ต๊ทผ๊น์ง ๋ฐ์ดํฐ๊ฐ ์ ์ ๋ฆฌ๋์ด ์๋ค.
์คํ ๋ด์ค ์น์ฌ์ดํธ์์ ๊ฐ์ ธ์จ ๋ฐ์ดํฐ๋ฅผ ๋ถ๋ฌ์ค๊ณ ๋ฐ์ดํฐ๋ถ์์ ํ์ํ ํฉํค์ง๋ฅผ ์ฌ์ ์ค๋นํ๋ค.
# 0. ํ๊ฒฝ์ค์ ----------------
# library(readxl)
# library(tidyverse)
# library(stringr)
# library(forcats)
# library(ggpubr)
# library(extrafont)
# loadfonts()
# library(xts)
# library(ggmap)
# library(leaflet)
# 1. ๋ฐ์ดํฐ ๋ถ๋ฌ์ค๊ธฐ ---------------
autonews_dat <- read_excel("data/CA105777620.XLS", sheet="Top 100 Global Suppliers (2016)", skip=1)
์์ ๋ก ๋ฐ์ดํฐ๊ฐ ์ ์ ๋ฆฌ๋์ด ์์ง๋ง ๋ฐ์ดํฐํ๋ ์ ํํ๋ก ๋ฐ์ดํฐ ๋ถ์ํ๊ธฐ ์ ์ ํ ํํ๋ก ๊ฐ๊ณตํ๋ค. ํนํ, ๊ฐ ๋ถํ๊ณต์ฌ๋ง๋ค ๊ณต๊ธํ๋ ์ ํ๊ณผ ์ฃผ์๋ฅผ ์ ์ ํ ์ ์ ๊ณผ์ ์ ๊ฑฐ์ณ ๊น๋ํ๊ฒ ์ ๋ฆฌํ๋ค.
# 2. ๋ฐ์ดํฐ ์ ์ ---------------
## 2.1. ์๋์ฐจ ๊ณต๊ธ์ฌ ๋ชฉ๋ก -----
autonews_df <- autonews_dat %>%
filter(!is.na(`2015\nrank`)) %>%
filter(row_number() <= 247)
## 2.2. ์๋์ฐจ ์ ์กฐํ๋ชฉ -----
autonews_product_df <- autonews_dat %>% select(`2015\nrank`, Products) %>%
filter(row_number() <= 247) %>%
mutate(`2015\nrank` = na.locf(`2015\nrank`)) %>% group_by(`2015\nrank`) %>%
summarise(items = paste0(Products, collapse = ",")) %>%
mutate(items = str_replace(items, ",NA$", ""))
## 2.2. ์๋์ฐจ ์ ์กฐํ๋ชฉ -----
autonews_address_df <- autonews_dat %>% select(`2015\nrank`, Address) %>%
filter(row_number() <= 247) %>%
mutate(`2015\nrank` = na.locf(`2015\nrank`)) %>% group_by(`2015\nrank`) %>%
summarise(address = paste0(Address, collapse = ",")) %>%
mutate(address = str_replace(address, ",NA", ""))
## 2.3. ์๋์ฐจ ๋ถํ์ฌ์ ์ ์กฐํ๋ชฉ ๋ณํฉ -----
autonews_df <- left_join(autonews_df, autonews_product_df, "2015\nrank") %>%
select(-X__1, -X__2, -X__3, -Products)
autonews_df <- left_join(autonews_df, autonews_address_df, "2015\nrank") %>%
select(-Address)
colnames(autonews_df) <- c(c("rank_2015", "company", "top_executive", "sales_2015",
"sales_2014", "NA_percent", "EU_percent", "AS_percent",
"ETC_percent", "rank_2014", "items", "address"))
autonews_df <- autonews_df %>%
mutate(NA_percent = ifelse(is.na(NA_percent), 0, NA_percent),
EU_percent = ifelse(is.na(EU_percent), 0, EU_percent),
AS_percent = ifelse(is.na(AS_percent), 0, AS_percent),
ETC_percent = ifelse(is.na(ETC_percent), 0, ETC_percent))
DT::datatable(autonews_df)
๋ณธ๊ฒฉ์ ์ธ ๋ฐ์ดํฐ ๋ถ์์ ์์ ๋ถํ๊ณต๊ธ์ฌ๊ฐ ์์นํ ์ฃผ์ ์ ๋ณด๋ฅผ ์ด์ฉํ์ฌ ์ ์ธ๊ณ ์ง๋์์ ํ๊ธฐํ๋ค. ์์ ggmap
ํฉํค์ง geocode()
ํจ์๋ฅผ ํ์ฉํ์ฌ ์๊ฒฝ๋ ์ ๋ณด๋ฅผ ๋ฐ์ ๋๊ณ ์ด๋ฅผ ์กฐ์ธํ์ฌ ์ง๋์์ ํ๊ธฐํ๋ค.
# 1. ๋ฐ์ดํฐ ๋ถ๋ฌ์ค๊ธฐ ---------------
supplier_df <- readRDS("data_processed/autonews_df.rds") %>%
mutate(address = str_replace(address, ",NA", ""))
supplier_lonlat <- readRDS("data_processed/supplier_lonlat.rds")
# 2. ์ง๋ฆฌ์ ๋ณด ---------------
## 2.1. ์ง๋ฆฌ์ ๋ณด ๋ฐ์ดํฐ: ์๋ ๊ฒฝ๋ ---
# supplier_lonlat <- geocode(supplier_df$address, source="google")
# saveRDS(supplier_lonlat, "data_processed/supplier_lonlat.rds")
supplier_lonlat_df <- bind_cols(supplier_df, supplier_lonlat)
# saveRDS(supplier_lonlat_df, "data_processed/supplier_lonlat_df.rds")
supplier_geo_df <- read_excel("data/supplier_address_csv.xlsx")
leaflet(data = supplier_geo_df) %>%
addProviderTiles(providers$OpenStreetMap) %>%
addMarkers(lng=~Longitude, lat=~Latitude,
popup = ~ as.character(paste0("<strong> ๊ธ๋ก๋ฒ ์๋์ฐจ ๋ถํ๊ณต๊ธ์ฌ </strong><br><br>",
"· ์์(2015): ", rank_2015, "<br>",
"· ํ์ฌ๋ช
: ", company, "<br>",
"· ์ฃผ์: ", address, "<br>",
"· ๋ํ: ", top_executive, "<br>",
"· ๋งค์ถ(๋ฐฑ๋ง$): ", scales::comma(sales_2015), "<br>",
"    · ๋ถ๋ฏธ: ", NA_percent, " %<br>",
"    · ์ ๋ฝ: ", EU_percent, " %<br>",
"    · ์์์: ", AS_percent, " %<br>",
"    · ๊ธฐํ: ", ETC_percent, " %<br>",
"· ๊ณต๊ธ๋ถํ: ", items, "<br>")), clusterOptions = markerClusterOptions())