library(tidyverse)
library(gtrendsR)
framework_raw <- gtrends(keyword = c("keras", "tensorflow", "pytorch", "huggingface"),
# geo = "KR",
# hl = "ko-KR",
time = "today+5-y")
framework_raw %>%
write_rds("data/framework_raw.rds")framework_raw <-
read_rds("data/framework_raw.rds")
framework_raw$interest_over_time %>%
as_tibble() %>%
mutate(hits = as.integer(hits)) %>%
mutate(keyword = factor(keyword, levels = c("keras", "pytorch", "tensorflow", "huggingface"))) %>%
ggplot(aes(x = date, y = hits, group = keyword, color = keyword)) +
geom_line() +
labs(x = "", y = "검색수",
title = "[전세계] 딥러닝 프레임워크 구글 검색 추세",
color = "프레임워크") +
theme_light() +
theme(legend.position = "top")framework_kr_raw <- gtrends(keyword = c("keras", "tensorflow", "pytorch", "huggingface"),
geo = "KR",
hl = "ko-KR",
time = "today+5-y")
framework_kr_raw %>%
write_rds("data/framework_kr_raw.rds")framework_kr_raw <-
read_rds("data/framework_kr_raw.rds")
framework_kr_raw$interest_over_time %>%
as_tibble() %>%
mutate(hits = as.integer(hits)) %>%
mutate(keyword = factor(keyword, levels = c("pytorch", "tensorflow","keras", "huggingface"))) %>%
ggplot(aes(x = date, y = hits, group = keyword, color = keyword)) +
geom_line() +
labs(x = "", y = "검색수",
title = "[대한민국] 딥러닝 프레임워크 구글 검색 추세",
color = "프레임워크") +
theme_light() +
theme(legend.position = "top")# 0. keras 와 tensorflow 패키지
library(keras)
# 1. 모형 아키텍처 정의
logistic_reg <- keras_model_sequential() %>%
layer_dense(units = 1,
input_shape = 1,
activation = "sigmoid")
# 모형 컴파일
logistic_reg %>%
compile(
loss = "binary_crossentropy",
optimizer = optimizer_adam(learning_rate = 0.01),
metrics = list("accuracy")
)
# 모형살펴보기
summary(logistic_reg)Model: "sequential"
________________________________________________________________________________
Layer (type) Output Shape Param #
================================================================================
dense (Dense) (None, 1) 2
================================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
________________________________________________________________________________
# 데이터
lr_raw <-read_rds("data/lr_tbl.rds")
# 정규화 전처리 과정
lr_tbl <- lr_raw %>%
mutate(학습시간 = scale(학습시간) )
# select predictor (x) and dependent variable (y) and convert to matrix
x_train <- as.matrix(lr_tbl %>% select(학습시간))
y_train <- as.matrix(lr_tbl %>% select(입학여부))
# 모형학습
history <- logistic_reg %>% fit(
x = x_train,
y = y_train,
epochs = 500,
validation_split = 0,
verbose = 0
)
plot(history)# 로지스틱 회귀모형 계수
logistic_reg$weights[[1]]
<tf.Variable 'dense/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[1.3267483]], dtype=float32)>
[[2]]
<tf.Variable 'dense/bias:0' shape=(1,) dtype=float32, numpy=array([0.03166157], dtype=float32)>
# 비교를 위해 GLM 적합
glm_fit <- glm(입학여부 ~ scale(학습시간 ), data = lr_tbl, family = binomial)
glm_fit
Call: glm(formula = 입학여부 ~ scale(학습시간), family = binomial,
data = lr_tbl)
Coefficients:
(Intercept) scale(학습시간)
0.1165 2.2677
Degrees of Freedom: 19 Total (i.e. Null); 18 Residual
Null Deviance: 27.73
Residual Deviance: 16.06 AIC: 20.06
tensorflow … keras 프레임워크 사용
library(yardstick)
# 합격여부 예측
keras_pred_tbl <- lr_tbl %>%
mutate(predicted_tf = predict(logistic_reg, x_train),
class_tf = ifelse(predicted_tf < 0.5, 0, 1)) %>%
mutate(입학여부 = factor(입학여부),
class_tf = factor(class_tf))
keras_pred_tbl %>%
conf_mat(truth = 입학여부, estimate = class_tf,
dnn = c("예측값", "참값")) 참값
예측값 0 1
0 8 3
1 2 7
keras_pred_tbl %>%
accuracy(truth = 입학여부, estimate = class_tf)# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 accuracy binary 0.75
GLM 로지스틱 모형 사용
glm_fit <- glm(입학여부 ~ scale(학습시간 ), data = lr_tbl, family = binomial)
glm_fit
Call: glm(formula = 입학여부 ~ scale(학습시간), family = binomial,
data = lr_tbl)
Coefficients:
(Intercept) scale(학습시간)
0.1165 2.2677
Degrees of Freedom: 19 Total (i.e. Null); 18 Residual
Null Deviance: 27.73
Residual Deviance: 16.06 AIC: 20.06
lr_pred_tbl <- lr_tbl %>%
mutate(predicted_lr = predict(glm_fit, newdata=lr_tbl, type = "response")) %>%
mutate(class_lr = ifelse(predicted_lr < 0.5, 0, 1)) %>%
mutate(입학여부 = factor(입학여부),
class_lr = factor(class_lr))
lr_pred_tbl %>%
conf_mat(truth = 입학여부, estimate = class_lr,
dnn = c("예측값", "참값")) 참값
예측값 0 1
0 8 2
1 2 8
lr_pred_tbl %>%
accuracy(truth = 입학여부, estimate = class_lr)# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 accuracy binary 0.8
# hdf5 format 내보내기
save_model_hdf5(logistic_reg, 'data/keras_lr_model.h5')
# 모형 불러오기
library(keras)
lr_model <- load_model_hdf5('data/keras_lr_model.h5')
# 신규 데이터 예측활용
lr_model$weights[[1]]
<tf.Variable 'dense/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[1.3267483]], dtype=float32)>
[[2]]
<tf.Variable 'dense/bias:0' shape=(1,) dtype=float32, numpy=array([0.03166157], dtype=float32)>
predict(lr_model, scale(c(0.1, 5, 10))) [,1]
[1,] 0.2157518
[2,] 0.5056816
[3,] 0.7962232
데이터 과학자 이광춘 저작
kwangchun.lee.7@gmail.com