library(tidyverse)
library(gtrendsR)
<- gtrends(keyword = c("keras", "tensorflow", "pytorch", "huggingface"),
framework_raw # geo = "KR",
# hl = "ko-KR",
time = "today+5-y")
%>%
framework_raw write_rds("data/framework_raw.rds")
<-
framework_raw read_rds("data/framework_raw.rds")
$interest_over_time %>%
framework_rawas_tibble() %>%
mutate(hits = as.integer(hits)) %>%
mutate(keyword = factor(keyword, levels = c("keras", "pytorch", "tensorflow", "huggingface"))) %>%
ggplot(aes(x = date, y = hits, group = keyword, color = keyword)) +
geom_line() +
labs(x = "", y = "검색수",
title = "[전세계] 딥러닝 프레임워크 구글 검색 추세",
color = "프레임워크") +
theme_light() +
theme(legend.position = "top")
<- gtrends(keyword = c("keras", "tensorflow", "pytorch", "huggingface"),
framework_kr_raw geo = "KR",
hl = "ko-KR",
time = "today+5-y")
%>%
framework_kr_raw write_rds("data/framework_kr_raw.rds")
<-
framework_kr_raw read_rds("data/framework_kr_raw.rds")
$interest_over_time %>%
framework_kr_rawas_tibble() %>%
mutate(hits = as.integer(hits)) %>%
mutate(keyword = factor(keyword, levels = c("pytorch", "tensorflow","keras", "huggingface"))) %>%
ggplot(aes(x = date, y = hits, group = keyword, color = keyword)) +
geom_line() +
labs(x = "", y = "검색수",
title = "[대한민국] 딥러닝 프레임워크 구글 검색 추세",
color = "프레임워크") +
theme_light() +
theme(legend.position = "top")
# 0. keras 와 tensorflow 패키지
library(keras)
# 1. 모형 아키텍처 정의
<- keras_model_sequential() %>%
logistic_reg layer_dense(units = 1,
input_shape = 1,
activation = "sigmoid")
# 모형 컴파일
%>%
logistic_reg compile(
loss = "binary_crossentropy",
optimizer = optimizer_adam(learning_rate = 0.01),
metrics = list("accuracy")
)
# 모형살펴보기
summary(logistic_reg)
Model: "sequential"
________________________________________________________________________________
Layer (type) Output Shape Param #
================================================================================
dense (Dense) (None, 1) 2
================================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
________________________________________________________________________________
# 데이터
<-read_rds("data/lr_tbl.rds")
lr_raw
# 정규화 전처리 과정
<- lr_raw %>%
lr_tbl mutate(학습시간 = scale(학습시간) )
# select predictor (x) and dependent variable (y) and convert to matrix
<- as.matrix(lr_tbl %>% select(학습시간))
x_train <- as.matrix(lr_tbl %>% select(입학여부))
y_train
# 모형학습
<- logistic_reg %>% fit(
history x = x_train,
y = y_train,
epochs = 500,
validation_split = 0,
verbose = 0
)
plot(history)
# 로지스틱 회귀모형 계수
$weights logistic_reg
[[1]]
<tf.Variable 'dense/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[1.3267483]], dtype=float32)>
[[2]]
<tf.Variable 'dense/bias:0' shape=(1,) dtype=float32, numpy=array([0.03166157], dtype=float32)>
# 비교를 위해 GLM 적합
<- glm(입학여부 ~ scale(학습시간 ), data = lr_tbl, family = binomial)
glm_fit glm_fit
Call: glm(formula = 입학여부 ~ scale(학습시간), family = binomial,
data = lr_tbl)
Coefficients:
(Intercept) scale(학습시간)
0.1165 2.2677
Degrees of Freedom: 19 Total (i.e. Null); 18 Residual
Null Deviance: 27.73
Residual Deviance: 16.06 AIC: 20.06
tensorflow
… keras
프레임워크 사용
library(yardstick)
# 합격여부 예측
<- lr_tbl %>%
keras_pred_tbl mutate(predicted_tf = predict(logistic_reg, x_train),
class_tf = ifelse(predicted_tf < 0.5, 0, 1)) %>%
mutate(입학여부 = factor(입학여부),
class_tf = factor(class_tf))
%>%
keras_pred_tbl conf_mat(truth = 입학여부, estimate = class_tf,
dnn = c("예측값", "참값"))
참값
예측값 0 1
0 8 3
1 2 7
%>%
keras_pred_tbl accuracy(truth = 입학여부, estimate = class_tf)
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 accuracy binary 0.75
GLM 로지스틱 모형 사용
<- glm(입학여부 ~ scale(학습시간 ), data = lr_tbl, family = binomial)
glm_fit glm_fit
Call: glm(formula = 입학여부 ~ scale(학습시간), family = binomial,
data = lr_tbl)
Coefficients:
(Intercept) scale(학습시간)
0.1165 2.2677
Degrees of Freedom: 19 Total (i.e. Null); 18 Residual
Null Deviance: 27.73
Residual Deviance: 16.06 AIC: 20.06
<- lr_tbl %>%
lr_pred_tbl mutate(predicted_lr = predict(glm_fit, newdata=lr_tbl, type = "response")) %>%
mutate(class_lr = ifelse(predicted_lr < 0.5, 0, 1)) %>%
mutate(입학여부 = factor(입학여부),
class_lr = factor(class_lr))
%>%
lr_pred_tbl conf_mat(truth = 입학여부, estimate = class_lr,
dnn = c("예측값", "참값"))
참값
예측값 0 1
0 8 2
1 2 8
%>%
lr_pred_tbl accuracy(truth = 입학여부, estimate = class_lr)
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 accuracy binary 0.8
# hdf5 format 내보내기
save_model_hdf5(logistic_reg, 'data/keras_lr_model.h5')
# 모형 불러오기
library(keras)
<- load_model_hdf5('data/keras_lr_model.h5')
lr_model
# 신규 데이터 예측활용
$weights lr_model
[[1]]
<tf.Variable 'dense/kernel:0' shape=(1, 1) dtype=float32, numpy=array([[1.3267483]], dtype=float32)>
[[2]]
<tf.Variable 'dense/bias:0' shape=(1,) dtype=float32, numpy=array([0.03166157], dtype=float32)>
predict(lr_model, scale(c(0.1, 5, 10)))
[,1]
[1,] 0.2157518
[2,] 0.5056816
[3,] 0.7962232
데이터 과학자 이광춘 저작
kwangchun.lee.7@gmail.com