1 데이터셋 1

R | Journal Entry Test | Fast track 블로그에 게시된 형 회계사님 데이터를 참고한다.

library(tidyverse)
library(readxl)

je_raw <- read_csv("data/je/je.csv", locale=locale('ko',encoding='euc-kr'))

je_tbl <- je_raw %>% 
  mutate(JEDATE = lubridate::ymd(JEDATE)) %>% 
  mutate(요일 = lubridate::wday(JEDATE, label = TRUE)) 

2 Data Integrity Test

library(skimr)

skim(je_tbl)
Data summary
Name je_tbl
Number of rows 309043
Number of columns 23
_______________________
Column type frequency:
character 9
Date 1
factor 1
numeric 12
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
ACCT_NM 0 1.00 2 16 0 211 0
거래처[코드] 2172 0.99 5 5 0 5909 0
결의사원[코드] 0 1.00 10 10 0 18 0
유형번호[유형명] 0 1.00 4 4 0 5 0
증빙구분[구분명] 222980 0.28 2 12 0 9 0
INSERT_ID 178226 0.42 10 10 0 17 0
입력일자 178226 0.42 21 22 0 20885 0
MODIFY_ID 772 1.00 10 10 0 10 0
MODIFY_DT 772 1.00 21 22 0 13253 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
JEDATE 0 1 2017-01-01 2017-12-31 2017-07-10 326

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
요일 0 1 TRUE 7 Tue: 76543, Mon: 65055, Wed: 53221, Thu: 50741

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
구 분[기표][번호] 0 1.00 7.468400e+02 8.968800e+02 1 57 321 1135 3236 ▇▂▁▁▁
JENO 0 1.00 6.520718e+10 8.598464e+10 201701011 2017092610 20170616160 201702221968 201712311321 ▇▁▁▁▃
LN_SQ 0 1.00 6.761000e+01 1.906500e+02 1 1 2 4 997 ▇▁▁▁▁
DRCR_FG 0 1.00 3.670000e+00 4.800000e-01 1 3 4 4 4 ▁▁▁▃▇
ACCTCD 0 1.00 2.434893e+04 1.552477e+04 10100 10800 25301 40401 99800 ▇▂▁▁▁
DR 207399 0.33 2.856139e+06 5.391426e+07 -1000000000 55000 77000 200000 4483053872 ▇▁▁▁▁
CR 101644 0.67 1.399763e+06 3.646970e+07 -1777760000 9000 55000 99000 4483053872 ▁▇▁▁▁
구 분[결의][년/월/일] 0 1.00 2.017068e+07 3.454500e+02 20170101 20170406 20170710 20171011 20171231 ▇▆▆▆▇
구 분[결의][번호] 0 1.00 7.402300e+02 8.966400e+02 1 50 314 1111 3225 ▇▂▁▁▁
결의부서[코드] 0 1.00 1.936410e+03 8.445700e+02 1400 1400 1400 2900 4120 ▇▁▁▂▁
PJT[코드] 78366 0.75 3.150050e+03 5.426600e+02 1100 3100 3100 3200 6100 ▁▇▃▁▁
유형번호[번호] 0 1.00 2.920000e+00 8.500000e-01 1 3 3 3 8 ▃▇▃▁▁

3 전표번호 별 차대변 일치검증

A02_tbl <- je_tbl %>% 
       select(JENO, DR, CR) %>% 
       mutate_all(~replace(., is.na(.), 0)) %>% 
       group_by(JENO) %>% 
       summarise(DR_sum = sum(DR),
                 CR_sum = sum(CR)) %>%  
       mutate(Differ= DR_sum - CR_sum)

A02_tbl %>% 
  filter(Differ > 0)  %>% 
  nrow()
[1] 0

4 대차대조표와 손익계산서 분리

계정과목 “<< 손 익 >>”을 기준으로 재무상태표와 손익계산서를 분리한다.

cytb_raw <- read_excel("data/je/CYTB.xlsx")

cytb_tbl <- cytb_raw %>% 
  mutate(계정구분 = ifelse(str_detect(계정과목, pattern =  "<< 손          익 >>"), "손익계산서", NA)) %>% 
  fill(계정구분, .direction = "down") %>% 
  mutate(계정구분 = ifelse(is.na(계정구분), "대차대조표", 계정구분))

cytb_bs_tbl <-  cytb_tbl %>% 
  filter(계정구분 == "대차대조표")

cytb_pl_tbl <-  cytb_tbl %>% 
  filter(계정구분 == "손익계산서")

4.1 대차대조표

cytb_bs_tbl %>% 
  reactable::reactable(filterable = TRUE, minRows = 10)

4.2 손익계산서

cytb_pl_tbl %>% 
  reactable::reactable(filterable = TRUE, minRows = 10)

5 시산표 Reconciliation 검증

시산표 Reconciliation 검증 (Trial Balance Rollforward Test) 작업을 수행하자.

5.1 당기 시산표

cytb_raw <- read_excel("data/je/CYTB.xlsx")

cytb_raw %>% 
  skim()
Data summary
Name Piped data
Number of rows 247
Number of columns 8
_______________________
Column type frequency:
character 2
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
ACCTCD 28 0.89 5 5 0 219 0
계정과목 0 1.00 5 18 0 247 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
차 변[잔 액] 0 1 1355768424 5983007213 -1309124319 0 0 82282798 62101614775 ▇▁▁▁▁
DRSUM 0 1 5302402886 24986271565 -1309124319 5514068 87440970 1006240376 253901405182 ▇▁▁▁▁
차 변[당월계] 0 1 802879793 3845681092 -1362904516 0 3750000 51827034 35296046716 ▇▁▁▁▁
대 변[당월계] 0 1 808683217 3748465986 -2087508276 0 745250 49324499 34831325277 ▇▁▁▁▁
CRSUM 0 1 5079485073 21853632283 -1272037660 0 41142000 826932148 198840705447 ▇▁▁▁▁
대 변[잔 액] 0 1 1132850612 4654045455 -1272037660 0 0 5944626 38571973779 ▇▁▁▁▁
cytb_tbl <- cytb_raw %>% 
  filter(!is.na(ACCTCD))

5.2 전기 시산표

pytb_raw <- read_excel("data/je/PYTB.xlsx")

pytb_raw %>% 
  skim()
Data summary
Name Piped data
Number of rows 250
Number of columns 8
_______________________
Column type frequency:
character 2
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
ACCTCD 28 0.89 5 5 0 222 0
계정과목 0 1.00 6 18 0 250 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
차 변[잔 액] 0 1 1375092822 6049647454 -615844901 0 0 97636782 61475340718 ▇▁▁▁▁
DRSUM 0 1 5367913342 25601417912 -615844901 5238137 81799868 995556416 262232340580 ▇▁▁▁▁
차 변[당월계] 0 1 663679161 2950865669 -562072440 0 1581166 58943939 28404366385 ▇▁▁▁▁
대 변[당월계] 0 1 667012548 3037415563 -440068365 0 1273409 54959574 28978902614 ▇▁▁▁▁
CRSUM 0 1 5154250951 22530121965 -1272037660 0 57420116 866302727 208816742870 ▇▁▁▁▁
대 변[잔 액] 0 1 1161430431 4671947604 -1272037660 0 0 13339894 36756466109 ▇▁▁▁▁
pytb_tbl <- pytb_raw %>% 
  filter(!is.na(ACCTCD))

5.3 당기전기 데이터 정제

cytb_fp <- cytb_tbl %>% 
  slice(1:99)

cytb_pl <- cytb_tbl %>% 
  slice(100:n())

pytb_fp <- pytb_tbl %>% 
  slice(1:103)
CYTB_FP <- full_join(cytb_fp, cytb_pl, by='ACCTCD') %>% 
       mutate_all(~replace(., is.na(.), 0)) %>% 
       mutate(balance = (DRSUM.x - CRSUM.x) -(DRSUM.y - CRSUM.y)) %>% 
       select(ACCTCD, balance)

CYTB_PL <- cytb_pl %>% 
       mutate(balance = (DRSUM - CRSUM)) %>% 
       select(ACCTCD, balance)

CYTB_balance <- bind_rows(CYTB_FP, CYTB_PL)
A03 <- je_tbl %>% 
  select(ACCTCD, DR, CR) %>% 
  mutate_all(~replace(., is.na(.), 0)) %>% 
  group_by(ACCTCD) %>% 
  summarise(DR_sum=sum(DR),
             CR_sum=sum(CR)) %>% 
  ungroup() %>% 
  # Join 할 때 자료형 오류 때문에 자료형 맞춰주기
  mutate(ACCTCD = as.character(ACCTCD)) 

A03 <- left_join(A03, CYTB_balance, by = 'ACCTCD')

skim(A03)
Data summary
Name A03
Number of rows 331
Number of columns 4
_______________________
Column type frequency:
character 1
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
ACCTCD 0 1 5 5 0 211 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
DR_sum 0 1.00 1025062139 5667982621 -1309124319 1406514 30173284 247611343 90863358102 ▇▁▁▁▁
CR_sum 0 1.00 1036975033 5754502189 -1002727743 0 7804962 112875589 90172042718 ▇▁▁▁▁
balance 5 0.98 -1250837 2542420174 -21283534544 -8119241 0 15937363 21283534544 ▁▁▇▁▁
# table(is.na(A03))
# sapply(A03, function(x) sum(is.na(x)))
    
A03 <- A03 %>% 
  mutate_all(~replace(.,is.na(.), 0)) %>% 
  mutate(Differ = (DR_sum - CR_sum - balance))

A03 %>% 
  filter(Differ > 0) %>% 
  nrow()
[1] 80
# A03[A03$Differ > 0, ] 

5.4 B01 매출의 상대계정분석

Corr_Acc <- '40401'

B09_main <- je_tbl  %>% 
  filter(ACCTCD == Corr_Acc) %>% 
  select(JENO, ACCTCD)

B09_Corr <- je_tbl %>% 
  select(JENO, ACCTCD)

B09 <- semi_join(B09_Corr, B09_main, by = 'JENO')

B09 <- B09  %>% 
  filter(!is.na(ACCTCD)) %>% 
  count(ACCTCD)

B09
# A tibble: 8 x 2
  ACCTCD     n
   <dbl> <int>
1  10800 57348
2  11300    66
3  25500 57216
4  25900    75
5  26900     2
6  40100    44
7  40401 57419
8  54100     1
B09_name <- je_tbl  %>% 
  select(ACCTCD, ACCT_NM) %>% 
  distinct()    

B09 <- left_join(B09, B09_name, by = 'ACCTCD') 

B09
# A tibble: 8 x 3
  ACCTCD     n ACCT_NM               
   <dbl> <int> <chr>                 
1  10800 57348 외상매출금            
2  11300    66 진행률미수금          
3  25500 57216 부가세예수금          
4  25900    75 선수금                
5  26900     2 프로젝트손실충당부채  
6  40100    44 상품매출              
7  40401 57419 제품매출              
8  54100     1 프로젝트손실전입액(제)

6 작업 결과 저장

A02_tbl %>% write_csv("data/je/A02_tbl.csv")
A03 %>% write_csv("data/je/A03.csv")
B09 %>% write_csv("data/je/B09.csv")
 

데이터 과학자 이광춘 저작

kwangchun.lee.7@gmail.com