먼저 이미지 데이터와 라벨 데이터를 다운로드 받는다.
캡챠 이미지 데이터와 라벨데이터를 결합 시켜보자.
library(glue)
filename <- list.files("data/captcha_tutorial/", pattern = "*.jpeg")
file_names_seq <- basename(filename) %>%
str_remove(pattern = ".jpeg") %>%
as.integer()
train_labels <- read_csv("data/captcha_tutorial/labels.txt", col_names = FALSE) %>%
rename(label = X1)
captcha_df <- tibble(filename = filename,
filename_seq = file_names_seq) %>%
arrange(file_names_seq) %>%
slice(1:5000) %>%
bind_cols(train_labels) %>%
mutate(image = glue("data/captcha_tutorial/{filename_seq}.jpeg"))
captcha_df %>% tail
# A tibble: 6 x 4
filename filename_seq label image
<chr> <int> <chr> <glue>
1 4995.jpeg 4995 511236 data/captcha_tutorial/4995.jpeg
2 4996.jpeg 4996 144833 data/captcha_tutorial/4996.jpeg
3 4997.jpeg 4997 590583 data/captcha_tutorial/4997.jpeg
4 4998.jpeg 4998 891249 data/captcha_tutorial/4998.jpeg
5 4999.jpeg 4999 075695 data/captcha_tutorial/4999.jpeg
6 5000.jpeg 5000 209554 data/captcha_tutorial/5000.jpeg