R | Journal Entry Test | Fast track 블로그에 게시된 형 회계사님 데이터를 참고한다.
library(tidyverse)
library(readxl)
je_raw <- read_csv("data/je/je.csv", locale=locale('ko',encoding='euc-kr'))
je_tbl <- je_raw %>% 
  mutate(JEDATE = lubridate::ymd(JEDATE)) %>% 
  mutate(요일 = lubridate::wday(JEDATE, label = TRUE)) library(skimr)
skim(je_tbl)| Name | je_tbl | 
| Number of rows | 309043 | 
| Number of columns | 23 | 
| _______________________ | |
| Column type frequency: | |
| character | 9 | 
| Date | 1 | 
| factor | 1 | 
| numeric | 12 | 
| ________________________ | |
| Group variables | None | 
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace | 
|---|---|---|---|---|---|---|---|
| ACCT_NM | 0 | 1.00 | 2 | 16 | 0 | 211 | 0 | 
| 거래처[코드] | 2172 | 0.99 | 5 | 5 | 0 | 5909 | 0 | 
| 결의사원[코드] | 0 | 1.00 | 10 | 10 | 0 | 18 | 0 | 
| 유형번호[유형명] | 0 | 1.00 | 4 | 4 | 0 | 5 | 0 | 
| 증빙구분[구분명] | 222980 | 0.28 | 2 | 12 | 0 | 9 | 0 | 
| INSERT_ID | 178226 | 0.42 | 10 | 10 | 0 | 17 | 0 | 
| 입력일자 | 178226 | 0.42 | 21 | 22 | 0 | 20885 | 0 | 
| MODIFY_ID | 772 | 1.00 | 10 | 10 | 0 | 10 | 0 | 
| MODIFY_DT | 772 | 1.00 | 21 | 22 | 0 | 13253 | 0 | 
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique | 
|---|---|---|---|---|---|---|
| JEDATE | 0 | 1 | 2017-01-01 | 2017-12-31 | 2017-07-10 | 326 | 
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts | 
|---|---|---|---|---|---|
| 요일 | 0 | 1 | TRUE | 7 | Tue: 76543, Mon: 65055, Wed: 53221, Thu: 50741 | 
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist | 
|---|---|---|---|---|---|---|---|---|---|---|
| 구 분[기표][번호] | 0 | 1.00 | 7.468400e+02 | 8.968800e+02 | 1 | 57 | 321 | 1135 | 3236 | ▇▂▁▁▁ | 
| JENO | 0 | 1.00 | 6.520718e+10 | 8.598464e+10 | 201701011 | 2017092610 | 20170616160 | 201702221968 | 201712311321 | ▇▁▁▁▃ | 
| LN_SQ | 0 | 1.00 | 6.761000e+01 | 1.906500e+02 | 1 | 1 | 2 | 4 | 997 | ▇▁▁▁▁ | 
| DRCR_FG | 0 | 1.00 | 3.670000e+00 | 4.800000e-01 | 1 | 3 | 4 | 4 | 4 | ▁▁▁▃▇ | 
| ACCTCD | 0 | 1.00 | 2.434893e+04 | 1.552477e+04 | 10100 | 10800 | 25301 | 40401 | 99800 | ▇▂▁▁▁ | 
| DR | 207399 | 0.33 | 2.856139e+06 | 5.391426e+07 | -1000000000 | 55000 | 77000 | 200000 | 4483053872 | ▇▁▁▁▁ | 
| CR | 101644 | 0.67 | 1.399763e+06 | 3.646970e+07 | -1777760000 | 9000 | 55000 | 99000 | 4483053872 | ▁▇▁▁▁ | 
| 구 분[결의][년/월/일] | 0 | 1.00 | 2.017068e+07 | 3.454500e+02 | 20170101 | 20170406 | 20170710 | 20171011 | 20171231 | ▇▆▆▆▇ | 
| 구 분[결의][번호] | 0 | 1.00 | 7.402300e+02 | 8.966400e+02 | 1 | 50 | 314 | 1111 | 3225 | ▇▂▁▁▁ | 
| 결의부서[코드] | 0 | 1.00 | 1.936410e+03 | 8.445700e+02 | 1400 | 1400 | 1400 | 2900 | 4120 | ▇▁▁▂▁ | 
| PJT[코드] | 78366 | 0.75 | 3.150050e+03 | 5.426600e+02 | 1100 | 3100 | 3100 | 3200 | 6100 | ▁▇▃▁▁ | 
| 유형번호[번호] | 0 | 1.00 | 2.920000e+00 | 8.500000e-01 | 1 | 3 | 3 | 3 | 8 | ▃▇▃▁▁ | 
A02_tbl <- je_tbl %>% 
       select(JENO, DR, CR) %>% 
       mutate_all(~replace(., is.na(.), 0)) %>% 
       group_by(JENO) %>% 
       summarise(DR_sum = sum(DR),
                 CR_sum = sum(CR)) %>%  
       mutate(Differ= DR_sum - CR_sum)
A02_tbl %>% 
  filter(Differ > 0)  %>% 
  nrow()[1] 0계정과목 “<< 손 익 >>”을 기준으로 재무상태표와 손익계산서를 분리한다.
cytb_raw <- read_excel("data/je/CYTB.xlsx")
cytb_tbl <- cytb_raw %>% 
  mutate(계정구분 = ifelse(str_detect(계정과목, pattern =  "<< 손          익 >>"), "손익계산서", NA)) %>% 
  fill(계정구분, .direction = "down") %>% 
  mutate(계정구분 = ifelse(is.na(계정구분), "대차대조표", 계정구분))
cytb_bs_tbl <-  cytb_tbl %>% 
  filter(계정구분 == "대차대조표")
cytb_pl_tbl <-  cytb_tbl %>% 
  filter(계정구분 == "손익계산서")cytb_bs_tbl %>% 
  reactable::reactable(filterable = TRUE, minRows = 10)cytb_pl_tbl %>% 
  reactable::reactable(filterable = TRUE, minRows = 10)시산표 Reconciliation 검증 (Trial Balance Rollforward Test) 작업을 수행하자.
cytb_raw <- read_excel("data/je/CYTB.xlsx")
cytb_raw %>% 
  skim()| Name | Piped data | 
| Number of rows | 247 | 
| Number of columns | 8 | 
| _______________________ | |
| Column type frequency: | |
| character | 2 | 
| numeric | 6 | 
| ________________________ | |
| Group variables | None | 
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace | 
|---|---|---|---|---|---|---|---|
| ACCTCD | 28 | 0.89 | 5 | 5 | 0 | 219 | 0 | 
| 계정과목 | 0 | 1.00 | 5 | 18 | 0 | 247 | 0 | 
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist | 
|---|---|---|---|---|---|---|---|---|---|---|
| 차 변[잔 액] | 0 | 1 | 1355768424 | 5983007213 | -1309124319 | 0 | 0 | 82282798 | 62101614775 | ▇▁▁▁▁ | 
| DRSUM | 0 | 1 | 5302402886 | 24986271565 | -1309124319 | 5514068 | 87440970 | 1006240376 | 253901405182 | ▇▁▁▁▁ | 
| 차 변[당월계] | 0 | 1 | 802879793 | 3845681092 | -1362904516 | 0 | 3750000 | 51827034 | 35296046716 | ▇▁▁▁▁ | 
| 대 변[당월계] | 0 | 1 | 808683217 | 3748465986 | -2087508276 | 0 | 745250 | 49324499 | 34831325277 | ▇▁▁▁▁ | 
| CRSUM | 0 | 1 | 5079485073 | 21853632283 | -1272037660 | 0 | 41142000 | 826932148 | 198840705447 | ▇▁▁▁▁ | 
| 대 변[잔 액] | 0 | 1 | 1132850612 | 4654045455 | -1272037660 | 0 | 0 | 5944626 | 38571973779 | ▇▁▁▁▁ | 
cytb_tbl <- cytb_raw %>% 
  filter(!is.na(ACCTCD))pytb_raw <- read_excel("data/je/PYTB.xlsx")
pytb_raw %>% 
  skim()| Name | Piped data | 
| Number of rows | 250 | 
| Number of columns | 8 | 
| _______________________ | |
| Column type frequency: | |
| character | 2 | 
| numeric | 6 | 
| ________________________ | |
| Group variables | None | 
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace | 
|---|---|---|---|---|---|---|---|
| ACCTCD | 28 | 0.89 | 5 | 5 | 0 | 222 | 0 | 
| 계정과목 | 0 | 1.00 | 6 | 18 | 0 | 250 | 0 | 
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist | 
|---|---|---|---|---|---|---|---|---|---|---|
| 차 변[잔 액] | 0 | 1 | 1375092822 | 6049647454 | -615844901 | 0 | 0 | 97636782 | 61475340718 | ▇▁▁▁▁ | 
| DRSUM | 0 | 1 | 5367913342 | 25601417912 | -615844901 | 5238137 | 81799868 | 995556416 | 262232340580 | ▇▁▁▁▁ | 
| 차 변[당월계] | 0 | 1 | 663679161 | 2950865669 | -562072440 | 0 | 1581166 | 58943939 | 28404366385 | ▇▁▁▁▁ | 
| 대 변[당월계] | 0 | 1 | 667012548 | 3037415563 | -440068365 | 0 | 1273409 | 54959574 | 28978902614 | ▇▁▁▁▁ | 
| CRSUM | 0 | 1 | 5154250951 | 22530121965 | -1272037660 | 0 | 57420116 | 866302727 | 208816742870 | ▇▁▁▁▁ | 
| 대 변[잔 액] | 0 | 1 | 1161430431 | 4671947604 | -1272037660 | 0 | 0 | 13339894 | 36756466109 | ▇▁▁▁▁ | 
pytb_tbl <- pytb_raw %>% 
  filter(!is.na(ACCTCD))cytb_fp <- cytb_tbl %>% 
  slice(1:99)
cytb_pl <- cytb_tbl %>% 
  slice(100:n())
pytb_fp <- pytb_tbl %>% 
  slice(1:103)CYTB_FP <- full_join(cytb_fp, cytb_pl, by='ACCTCD') %>% 
       mutate_all(~replace(., is.na(.), 0)) %>% 
       mutate(balance = (DRSUM.x - CRSUM.x) -(DRSUM.y - CRSUM.y)) %>% 
       select(ACCTCD, balance)
CYTB_PL <- cytb_pl %>% 
       mutate(balance = (DRSUM - CRSUM)) %>% 
       select(ACCTCD, balance)
CYTB_balance <- bind_rows(CYTB_FP, CYTB_PL)A03 <- je_tbl %>% 
  select(ACCTCD, DR, CR) %>% 
  mutate_all(~replace(., is.na(.), 0)) %>% 
  group_by(ACCTCD) %>% 
  summarise(DR_sum=sum(DR),
             CR_sum=sum(CR)) %>% 
  ungroup() %>% 
  # Join 할 때 자료형 오류 때문에 자료형 맞춰주기
  mutate(ACCTCD = as.character(ACCTCD)) 
A03 <- left_join(A03, CYTB_balance, by = 'ACCTCD')
skim(A03)| Name | A03 | 
| Number of rows | 331 | 
| Number of columns | 4 | 
| _______________________ | |
| Column type frequency: | |
| character | 1 | 
| numeric | 3 | 
| ________________________ | |
| Group variables | None | 
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace | 
|---|---|---|---|---|---|---|---|
| ACCTCD | 0 | 1 | 5 | 5 | 0 | 211 | 0 | 
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist | 
|---|---|---|---|---|---|---|---|---|---|---|
| DR_sum | 0 | 1.00 | 1025062139 | 5667982621 | -1309124319 | 1406514 | 30173284 | 247611343 | 90863358102 | ▇▁▁▁▁ | 
| CR_sum | 0 | 1.00 | 1036975033 | 5754502189 | -1002727743 | 0 | 7804962 | 112875589 | 90172042718 | ▇▁▁▁▁ | 
| balance | 5 | 0.98 | -1250837 | 2542420174 | -21283534544 | -8119241 | 0 | 15937363 | 21283534544 | ▁▁▇▁▁ | 
# table(is.na(A03))
# sapply(A03, function(x) sum(is.na(x)))
    
A03 <- A03 %>% 
  mutate_all(~replace(.,is.na(.), 0)) %>% 
  mutate(Differ = (DR_sum - CR_sum - balance))
A03 %>% 
  filter(Differ > 0) %>% 
  nrow()[1] 80# A03[A03$Differ > 0, ] Corr_Acc <- '40401'
B09_main <- je_tbl  %>% 
  filter(ACCTCD == Corr_Acc) %>% 
  select(JENO, ACCTCD)
B09_Corr <- je_tbl %>% 
  select(JENO, ACCTCD)
B09 <- semi_join(B09_Corr, B09_main, by = 'JENO')
B09 <- B09  %>% 
  filter(!is.na(ACCTCD)) %>% 
  count(ACCTCD)
B09# A tibble: 8 x 2
  ACCTCD     n
   <dbl> <int>
1  10800 57348
2  11300    66
3  25500 57216
4  25900    75
5  26900     2
6  40100    44
7  40401 57419
8  54100     1B09_name <- je_tbl  %>% 
  select(ACCTCD, ACCT_NM) %>% 
  distinct()    
B09 <- left_join(B09, B09_name, by = 'ACCTCD') 
B09# A tibble: 8 x 3
  ACCTCD     n ACCT_NM               
   <dbl> <int> <chr>                 
1  10800 57348 외상매출금            
2  11300    66 진행률미수금          
3  25500 57216 부가세예수금          
4  25900    75 선수금                
5  26900     2 프로젝트손실충당부채  
6  40100    44 상품매출              
7  40401 57419 제품매출              
8  54100     1 프로젝트손실전입액(제)A02_tbl %>% write_csv("data/je/A02_tbl.csv")
A03 %>% write_csv("data/je/A03.csv")
B09 %>% write_csv("data/je/B09.csv")데이터 과학자 이광춘 저작
kwangchun.lee.7@gmail.com