class: center, middle, inverse, title-slide # 표에 대한 거의 모든 것 ### Tidyverse Korea, 이광춘, 2020-10-30 --- ## .left[🛠 발표 목차] <br> .alert[ .large[ - 범주형 자료 - 표 자료구조 - 시각화 - 모형 - 표 작성 ] ] --- class: inverse middle center .pull-left[ .animate__animated.animate__bounceInDown[ data:image/s3,"s3://crabby-images/5b458/5b458fb06ae65816d93191d54605ffb50db78070" alt=":scale 50%" ] ] .pull-right[ <br> <br> # 범주형 자료 🔍 <hr> ### 자료구조 ### 요인(Factor) ### forcats ]
--- class: middle .left-column[ .center[data:image/s3,"s3://crabby-images/5b458/5b458fb06ae65816d93191d54605ffb50db78070" alt=":scale 50%"] ### - 자료구조 ] .right-column[ ### Stevens, Stanley Smith. “On the theory of scales of measurement.” (1946) .center[ data:image/s3,"s3://crabby-images/45ff7/45ff70f9ad87ad9b6b936a4f1e42fc67686c6f61" alt=":scale 100%" ] .footnote[reference: https://statkclee.github.io/data-science/ds-data-structure.html] ] --- class: middle .left-column[ .center[data:image/s3,"s3://crabby-images/5b458/5b458fb06ae65816d93191d54605ffb50db78070" alt=":scale 50%"] ### - 자료구조 ] .right-column[ ### R 자료구조 .center[ data:image/s3,"s3://crabby-images/f87e2/f87e2323c61e5bfaaf369a32c5a73661768a7386" alt=":scale 100%" ] .footnote[reference: https://statkclee.github.io/data-science/ds-data-structure.html] ] --- class: middle .left-column[ .center[data:image/s3,"s3://crabby-images/5b458/5b458fb06ae65816d93191d54605ffb50db78070" alt=":scale 50%"] ### - 자료구조 ### - Factor ] .right-column[ ### 요인(Factor) ```r library(tidyverse) x1 <- c("Dec", "Apr", "Jan", "Mar") sort(x1) ``` ``` #> [1] "Apr" "Dec" "Jan" "Mar" ``` ```r month_levels <- c( "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec") y1 <- factor(x1, levels = month_levels) y1 ``` ``` #> [1] Dec Apr Jan Mar #> Levels: Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ``` .footnote[reference: https://statkclee.github.io/data-science/ds-factor-cdata.html] ] --- class: middle .left-column[ .center[data:image/s3,"s3://crabby-images/5b458/5b458fb06ae65816d93191d54605ffb50db78070" alt=":scale 50%"] ### - 자료구조 ### - Factor ### - forcats ] .right-column[ ### `forcats` - 요인 수준(factor level)에 대한 라벨(label) 바꿈 → `forcats::fct_recode`, `dplyr::recode` - 요인 수준 순서 바꾸기 → `fct_relevel()` - 요인 범주수준을 줄이기 → `if_else()` - 파생 요인 변수 생성시킴 → `case_when()` .footnote[reference: https://statkclee.github.io/data-science/ds-factor-cdata.html] ] --- class: inverse middle center .pull-left[ .animate__animated.animate__bounceInDown[ data:image/s3,"s3://crabby-images/5b458/5b458fb06ae65816d93191d54605ffb50db78070" alt=":scale 50%" ] ] .pull-right[ <br> <br> # 표 자료구조 🔍 <hr> ## 큰 그림 ### Case-by-Case ### Frequency ### Table ### 변환 ] --- class: middle .left-column[ .center[data:image/s3,"s3://crabby-images/5b458/5b458fb06ae65816d93191d54605ffb50db78070" alt=":scale 50%"] ### 큰 그림 ] .right-column[ ### 큰 그림 .center[ data:image/s3,"s3://crabby-images/1203c/1203c9acc4eed08d3a07b697d081da207268c434" alt=":scale 60%" ] .footnote[reference: https://cran.r-project.org/web/packages/DescTools/vignettes/TablesInR.pdf] ] --- class: middle .left-column[ .center[data:image/s3,"s3://crabby-images/5b458/5b458fb06ae65816d93191d54605ffb50db78070" alt=":scale 50%"] #### Case-by-Case ] .right-column[ ### Case-by-Case ```r library(DescTools) Untable(UCBAdmissions) %>% head ``` ``` #> Admit Gender Dept #> 1 Admitted Male A #> 2 Admitted Male A #> 3 Admitted Male A #> 4 Admitted Male A #> 5 Admitted Male A #> 6 Admitted Male A ``` .footnote[reference: https://cran.r-project.org/web/packages/DescTools/vignettes/TablesInR.pdf] ] --- class: middle .left-column[ .center[data:image/s3,"s3://crabby-images/5b458/5b458fb06ae65816d93191d54605ffb50db78070" alt=":scale 50%"] #### Case-by-Case ### Frequency ] .right-column[ ### 빈도수 (Frequency) ```r data.frame(UCBAdmissions) %>% head ``` ``` #> Admit Gender Dept Freq #> 1 Admitted Male A 512 #> 2 Rejected Male A 313 #> 3 Admitted Female A 89 #> 4 Rejected Female A 19 #> 5 Admitted Male B 353 #> 6 Rejected Male B 207 ``` .footnote[reference: https://cran.r-project.org/web/packages/DescTools/vignettes/TablesInR.pdf] ] --- class: middle .left-column[ .center[data:image/s3,"s3://crabby-images/5b458/5b458fb06ae65816d93191d54605ffb50db78070" alt=":scale 50%"] #### Case-by-Case ### Frequency ### Table ] .right-column[ ### 표(Table) ```r UCBAdmissions[,,Dept=c("A", "B")] ``` ``` #> , , Dept = A #> #> Gender #> Admit Male Female #> Admitted 512 89 #> Rejected 313 19 #> #> , , Dept = B #> #> Gender #> Admit Male Female #> Admitted 353 17 #> Rejected 207 8 ``` .footnote[reference: https://cran.r-project.org/web/packages/DescTools/vignettes/TablesInR.pdf] ] --- class: middle .left-column[ .center[data:image/s3,"s3://crabby-images/5b458/5b458fb06ae65816d93191d54605ffb50db78070" alt=":scale 50%"] #### Case-by-Case ### Frequency ### Table ### 변환 ] .right-column[ ### 변환 .center[ data:image/s3,"s3://crabby-images/38a96/38a968676fb1ed7f52be881fa0a85ec5fc7dcb3d" alt=":scale 80%" ] .footnote[reference: https://cran.r-project.org/web/packages/DescTools/vignettes/TablesInR.pdf] ] --- class: inverse middle center .pull-left[ .animate__animated.animate__bounceInDown[ data:image/s3,"s3://crabby-images/c323a/c323a23d7977aecc1fd83a5ecdf9a8e893dc9ccb" alt=":scale 50%" ] ] .pull-right[ <br> <br> # 시각화 📊 <hr> ## 큰 그림 ## 깔끔한 데이터 ## ggplot ## ggmosaic ] --- class: middle .left-column[ .center[data:image/s3,"s3://crabby-images/c323a/c323a23d7977aecc1fd83a5ecdf9a8e893dc9ccb" alt=":scale 50%"] ### 큰 그림 ] .right-column[ ### 큰 그림 .center[ data:image/s3,"s3://crabby-images/d4f7a/d4f7af22b9bf28f3729fe89af1d61f652e4a4fb6" alt=":scale 100%" ] .footnote[reference: https://statkclee.github.io/data-science/ds-factor-dplyr-mosaic.html] ] --- class: middle .left-column[ .center[data:image/s3,"s3://crabby-images/c323a/c323a23d7977aecc1fd83a5ecdf9a8e893dc9ccb" alt=":scale 50%"] ### 큰 그림 ] .right-column[ ### 큰 그림 .center[ data:image/s3,"s3://crabby-images/d4f7a/d4f7af22b9bf28f3729fe89af1d61f652e4a4fb6" alt=":scale 100%" ] .footnote[reference: https://statkclee.github.io/data-science/ds-factor-dplyr-mosaic.html] ] --- class: middle .left-column[ .center[ data:image/s3,"s3://crabby-images/c323a/c323a23d7977aecc1fd83a5ecdf9a8e893dc9ccb" alt=":scale 50%" ] ### 큰 그림 ### Tidy 데이터 ] .right-column[ ### 깔끔한 데이터 ```r data("HairEyeColor") hair_df <- as_tibble(HairEyeColor) %>% janitor::clean_names() hair_df %>% sample_n(3) ``` ``` #> [38;5;246m# A tibble: 3 x 4[39m #> hair eye sex n #> [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<chr>[39m[23m [3m[38;5;246m<dbl>[39m[23m #> [38;5;250m1[39m Red Hazel Male 7 #> [38;5;250m2[39m Black Green Female 2 #> [38;5;250m3[39m Black Hazel Male 10 ``` .footnote[reference: https://statkclee.github.io/data-science/ds-factor-dplyr-mosaic.html] ] --- class: middle .left-column[ .center[ data:image/s3,"s3://crabby-images/c323a/c323a23d7977aecc1fd83a5ecdf9a8e893dc9ccb" alt=":scale 50%" ] ### 큰 그림 ### Tidy 데이터 ### ggplot ] .right-column[ ### 깔끔한 데이터 .pull-left[ ```r hair_df %>% unite(appearance, hair:sex, sep="_") %>% filter(n > 10) %>% mutate(appearance = fct_reorder(appearance, n)) %>% ggplot(aes(x=appearance, y=n)) + geom_col() + coord_flip() + theme_bw() + labs(x="", y="") ``` ] .pull-right[ <img src="figure/unnamed-chunk-1-1.png" width="100%" style="display: block; margin: auto;" /> ] .footnote[reference: https://statkclee.github.io/data-science/ds-factor-dplyr-mosaic.html] ] --- class: middle .left-column[ .center[ data:image/s3,"s3://crabby-images/c323a/c323a23d7977aecc1fd83a5ecdf9a8e893dc9ccb" alt=":scale 50%" ] ### 큰 그림 ### Tidy 데이터 ### ggplot ### `ggmosaic` ] .right-column[ ### 모자이크 플롯 .pull-left[ ```r library(ggmosaic) ggplot(data = hair_df) + geom_mosaic(aes(weight = n, x= product(hair), conds=product(sex), fill=eye)) + theme_minimal() ``` ] .pull-right[ <img src="figure/unnamed-chunk-2-1.png" width="100%" style="display: block; margin: auto;" /> ] .footnote[reference: https://statkclee.github.io/data-science/ds-factor-dplyr-mosaic.html] ] --- class: inverse middle center .pull-left[ .animate__animated.animate__bounceInDown[ data:image/s3,"s3://crabby-images/18747/187476b212c3b66c482e79009e95ede71eb30b2d" alt=":scale 50%" ] ] .pull-right[ <br> <br> # 모형 💻 <hr> ### V2 로켓 ### 통계 검정 ] --- class: middle .left-column[ .center[ data:image/s3,"s3://crabby-images/18747/187476b212c3b66c482e79009e95ede71eb30b2d" alt=":scale 100%" ] ### V2 로켓 ] .right-column[ ### V2 로켓 🚀 .pull-left[ ```r library(spatstat) par(mar = rep(0, 4)) hit <- 537 area <- 576 lambda <- hit/area south_london <- rpoispp(lambda, win = owin(c(0, 24), c(0, 24))) plot(south_london, main="", cex=0.5) abline(h = 0:24, v = 0:24, col = "lightgray", lty = 3) ``` ] .pull-right[ <img src="figure/unnamed-chunk-3-1.png" width="100%" style="display: block; margin: auto;" /> ] .footnote[reference: https://statkclee.github.io/statistics/stat-flying-bomb-poisson.html] ] --- class: middle .left-column[ .center[ data:image/s3,"s3://crabby-images/18747/187476b212c3b66c482e79009e95ede71eb30b2d" alt=":scale 100%" ] ### V2 로켓 ### 통계 검정 ] .right-column[ ### 통계 검정 ```r library(infer) gss_cat_df <- gss_cat %>% filter(year==2014) %>% mutate(marital = fct_lump(marital, 1), race = fct_lump(race, 1)) %>% mutate(marital = fct_recode(marital, Non_Married = "Other"), race = fct_recode(race, Non_White = "Other")) ## 교차표 ----- gss_cat_df %>% count(marital, race) %>% spread(marital, n) ``` .footnote[reference: https://statkclee.github.io/statistics/stat-categorical-data-chisquared.html] ] --- class: inverse middle center .pull-left[ .animate__animated.animate__bounceInDown[ data:image/s3,"s3://crabby-images/8ec16/8ec16267458578d85cea985bc694bfca67785cf7" alt=":scale 50%" ] ] .pull-right[ <br> <br> # 표 작성 📖 <hr> ### gt ### reactable ] --- class: middle .left-column[ .center[ data:image/s3,"s3://crabby-images/8ec16/8ec16267458578d85cea985bc694bfca67785cf7" alt=":scale 50%" ] ### `gt` ] .right-column[ ### `gt` data:image/s3,"s3://crabby-images/6ebf3/6ebf33076481c4ef83366dd31fad19c3b0c650e2" alt="" .footnote[reference: https://statkclee.github.io/data-science/ds-table-gt-kable.html] ] --- class: middle .left-column[ .center[ data:image/s3,"s3://crabby-images/73460/734608e677df3084b03719eff4a086d9ff1f4de1" alt=":scale 50%" ] ### `gt` ### `reactable` ] .right-column[ ### `reactable`
.footnote[reference: https://statkclee.github.io/ds-authoring/krila-digital-new-deal.html] ] --- class: center middle inverse # 감사합니다. -- # 질문? -- # 끝 --- class: center middle inverse .animate__animated.animate__hinge[ # 감사합니다. # 질문? # 끝 ]