R | Journal Entry Test | Fast track 블로그에 게시된 형 회계사님 데이터를 참고한다.
library(tidyverse)
library(readxl)
<- read_csv("data/je/je.csv", locale=locale('ko',encoding='euc-kr'))
je_raw
<- je_raw %>%
je_tbl mutate(JEDATE = lubridate::ymd(JEDATE)) %>%
mutate(요일 = lubridate::wday(JEDATE, label = TRUE))
library(skimr)
skim(je_tbl)
Name | je_tbl |
Number of rows | 309043 |
Number of columns | 23 |
_______________________ | |
Column type frequency: | |
character | 9 |
Date | 1 |
factor | 1 |
numeric | 12 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
ACCT_NM | 0 | 1.00 | 2 | 16 | 0 | 211 | 0 |
거래처[코드] | 2172 | 0.99 | 5 | 5 | 0 | 5909 | 0 |
결의사원[코드] | 0 | 1.00 | 10 | 10 | 0 | 18 | 0 |
유형번호[유형명] | 0 | 1.00 | 4 | 4 | 0 | 5 | 0 |
증빙구분[구분명] | 222980 | 0.28 | 2 | 12 | 0 | 9 | 0 |
INSERT_ID | 178226 | 0.42 | 10 | 10 | 0 | 17 | 0 |
입력일자 | 178226 | 0.42 | 21 | 22 | 0 | 20885 | 0 |
MODIFY_ID | 772 | 1.00 | 10 | 10 | 0 | 10 | 0 |
MODIFY_DT | 772 | 1.00 | 21 | 22 | 0 | 13253 | 0 |
Variable type: Date
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
JEDATE | 0 | 1 | 2017-01-01 | 2017-12-31 | 2017-07-10 | 326 |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
요일 | 0 | 1 | TRUE | 7 | Tue: 76543, Mon: 65055, Wed: 53221, Thu: 50741 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
구 분[기표][번호] | 0 | 1.00 | 7.468400e+02 | 8.968800e+02 | 1 | 57 | 321 | 1135 | 3236 | ▇▂▁▁▁ |
JENO | 0 | 1.00 | 6.520718e+10 | 8.598464e+10 | 201701011 | 2017092610 | 20170616160 | 201702221968 | 201712311321 | ▇▁▁▁▃ |
LN_SQ | 0 | 1.00 | 6.761000e+01 | 1.906500e+02 | 1 | 1 | 2 | 4 | 997 | ▇▁▁▁▁ |
DRCR_FG | 0 | 1.00 | 3.670000e+00 | 4.800000e-01 | 1 | 3 | 4 | 4 | 4 | ▁▁▁▃▇ |
ACCTCD | 0 | 1.00 | 2.434893e+04 | 1.552477e+04 | 10100 | 10800 | 25301 | 40401 | 99800 | ▇▂▁▁▁ |
DR | 207399 | 0.33 | 2.856139e+06 | 5.391426e+07 | -1000000000 | 55000 | 77000 | 200000 | 4483053872 | ▇▁▁▁▁ |
CR | 101644 | 0.67 | 1.399763e+06 | 3.646970e+07 | -1777760000 | 9000 | 55000 | 99000 | 4483053872 | ▁▇▁▁▁ |
구 분[결의][년/월/일] | 0 | 1.00 | 2.017068e+07 | 3.454500e+02 | 20170101 | 20170406 | 20170710 | 20171011 | 20171231 | ▇▆▆▆▇ |
구 분[결의][번호] | 0 | 1.00 | 7.402300e+02 | 8.966400e+02 | 1 | 50 | 314 | 1111 | 3225 | ▇▂▁▁▁ |
결의부서[코드] | 0 | 1.00 | 1.936410e+03 | 8.445700e+02 | 1400 | 1400 | 1400 | 2900 | 4120 | ▇▁▁▂▁ |
PJT[코드] | 78366 | 0.75 | 3.150050e+03 | 5.426600e+02 | 1100 | 3100 | 3100 | 3200 | 6100 | ▁▇▃▁▁ |
유형번호[번호] | 0 | 1.00 | 2.920000e+00 | 8.500000e-01 | 1 | 3 | 3 | 3 | 8 | ▃▇▃▁▁ |
<- je_tbl %>%
A02_tbl select(JENO, DR, CR) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
group_by(JENO) %>%
summarise(DR_sum = sum(DR),
CR_sum = sum(CR)) %>%
mutate(Differ= DR_sum - CR_sum)
%>%
A02_tbl filter(Differ > 0) %>%
nrow()
[1] 0
계정과목
“<< 손 익 >>”을 기준으로 재무상태표와 손익계산서를 분리한다.
<- read_excel("data/je/CYTB.xlsx")
cytb_raw
<- cytb_raw %>%
cytb_tbl mutate(계정구분 = ifelse(str_detect(계정과목, pattern = "<< 손 익 >>"), "손익계산서", NA)) %>%
fill(계정구분, .direction = "down") %>%
mutate(계정구분 = ifelse(is.na(계정구분), "대차대조표", 계정구분))
<- cytb_tbl %>%
cytb_bs_tbl filter(계정구분 == "대차대조표")
<- cytb_tbl %>%
cytb_pl_tbl filter(계정구분 == "손익계산서")
%>%
cytb_bs_tbl ::reactable(filterable = TRUE, minRows = 10) reactable
%>%
cytb_pl_tbl ::reactable(filterable = TRUE, minRows = 10) reactable
시산표 Reconciliation 검증 (Trial Balance Rollforward Test) 작업을 수행하자.
<- read_excel("data/je/CYTB.xlsx")
cytb_raw
%>%
cytb_raw skim()
Name | Piped data |
Number of rows | 247 |
Number of columns | 8 |
_______________________ | |
Column type frequency: | |
character | 2 |
numeric | 6 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
ACCTCD | 28 | 0.89 | 5 | 5 | 0 | 219 | 0 |
계정과목 | 0 | 1.00 | 5 | 18 | 0 | 247 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
차 변[잔 액] | 0 | 1 | 1355768424 | 5983007213 | -1309124319 | 0 | 0 | 82282798 | 62101614775 | ▇▁▁▁▁ |
DRSUM | 0 | 1 | 5302402886 | 24986271565 | -1309124319 | 5514068 | 87440970 | 1006240376 | 253901405182 | ▇▁▁▁▁ |
차 변[당월계] | 0 | 1 | 802879793 | 3845681092 | -1362904516 | 0 | 3750000 | 51827034 | 35296046716 | ▇▁▁▁▁ |
대 변[당월계] | 0 | 1 | 808683217 | 3748465986 | -2087508276 | 0 | 745250 | 49324499 | 34831325277 | ▇▁▁▁▁ |
CRSUM | 0 | 1 | 5079485073 | 21853632283 | -1272037660 | 0 | 41142000 | 826932148 | 198840705447 | ▇▁▁▁▁ |
대 변[잔 액] | 0 | 1 | 1132850612 | 4654045455 | -1272037660 | 0 | 0 | 5944626 | 38571973779 | ▇▁▁▁▁ |
<- cytb_raw %>%
cytb_tbl filter(!is.na(ACCTCD))
<- read_excel("data/je/PYTB.xlsx")
pytb_raw
%>%
pytb_raw skim()
Name | Piped data |
Number of rows | 250 |
Number of columns | 8 |
_______________________ | |
Column type frequency: | |
character | 2 |
numeric | 6 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
ACCTCD | 28 | 0.89 | 5 | 5 | 0 | 222 | 0 |
계정과목 | 0 | 1.00 | 6 | 18 | 0 | 250 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
차 변[잔 액] | 0 | 1 | 1375092822 | 6049647454 | -615844901 | 0 | 0 | 97636782 | 61475340718 | ▇▁▁▁▁ |
DRSUM | 0 | 1 | 5367913342 | 25601417912 | -615844901 | 5238137 | 81799868 | 995556416 | 262232340580 | ▇▁▁▁▁ |
차 변[당월계] | 0 | 1 | 663679161 | 2950865669 | -562072440 | 0 | 1581166 | 58943939 | 28404366385 | ▇▁▁▁▁ |
대 변[당월계] | 0 | 1 | 667012548 | 3037415563 | -440068365 | 0 | 1273409 | 54959574 | 28978902614 | ▇▁▁▁▁ |
CRSUM | 0 | 1 | 5154250951 | 22530121965 | -1272037660 | 0 | 57420116 | 866302727 | 208816742870 | ▇▁▁▁▁ |
대 변[잔 액] | 0 | 1 | 1161430431 | 4671947604 | -1272037660 | 0 | 0 | 13339894 | 36756466109 | ▇▁▁▁▁ |
<- pytb_raw %>%
pytb_tbl filter(!is.na(ACCTCD))
<- cytb_tbl %>%
cytb_fp slice(1:99)
<- cytb_tbl %>%
cytb_pl slice(100:n())
<- pytb_tbl %>%
pytb_fp slice(1:103)
<- full_join(cytb_fp, cytb_pl, by='ACCTCD') %>%
CYTB_FP mutate_all(~replace(., is.na(.), 0)) %>%
mutate(balance = (DRSUM.x - CRSUM.x) -(DRSUM.y - CRSUM.y)) %>%
select(ACCTCD, balance)
<- cytb_pl %>%
CYTB_PL mutate(balance = (DRSUM - CRSUM)) %>%
select(ACCTCD, balance)
<- bind_rows(CYTB_FP, CYTB_PL) CYTB_balance
<- je_tbl %>%
A03 select(ACCTCD, DR, CR) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
group_by(ACCTCD) %>%
summarise(DR_sum=sum(DR),
CR_sum=sum(CR)) %>%
ungroup() %>%
# Join 할 때 자료형 오류 때문에 자료형 맞춰주기
mutate(ACCTCD = as.character(ACCTCD))
<- left_join(A03, CYTB_balance, by = 'ACCTCD')
A03
skim(A03)
Name | A03 |
Number of rows | 331 |
Number of columns | 4 |
_______________________ | |
Column type frequency: | |
character | 1 |
numeric | 3 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
ACCTCD | 0 | 1 | 5 | 5 | 0 | 211 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
DR_sum | 0 | 1.00 | 1025062139 | 5667982621 | -1309124319 | 1406514 | 30173284 | 247611343 | 90863358102 | ▇▁▁▁▁ |
CR_sum | 0 | 1.00 | 1036975033 | 5754502189 | -1002727743 | 0 | 7804962 | 112875589 | 90172042718 | ▇▁▁▁▁ |
balance | 5 | 0.98 | -1250837 | 2542420174 | -21283534544 | -8119241 | 0 | 15937363 | 21283534544 | ▁▁▇▁▁ |
# table(is.na(A03))
# sapply(A03, function(x) sum(is.na(x)))
<- A03 %>%
A03 mutate_all(~replace(.,is.na(.), 0)) %>%
mutate(Differ = (DR_sum - CR_sum - balance))
%>%
A03 filter(Differ > 0) %>%
nrow()
[1] 80
# A03[A03$Differ > 0, ]
<- '40401'
Corr_Acc
<- je_tbl %>%
B09_main filter(ACCTCD == Corr_Acc) %>%
select(JENO, ACCTCD)
<- je_tbl %>%
B09_Corr select(JENO, ACCTCD)
<- semi_join(B09_Corr, B09_main, by = 'JENO')
B09
<- B09 %>%
B09 filter(!is.na(ACCTCD)) %>%
count(ACCTCD)
B09
# A tibble: 8 x 2
ACCTCD n
<dbl> <int>
1 10800 57348
2 11300 66
3 25500 57216
4 25900 75
5 26900 2
6 40100 44
7 40401 57419
8 54100 1
<- je_tbl %>%
B09_name select(ACCTCD, ACCT_NM) %>%
distinct()
<- left_join(B09, B09_name, by = 'ACCTCD')
B09
B09
# A tibble: 8 x 3
ACCTCD n ACCT_NM
<dbl> <int> <chr>
1 10800 57348 외상매출금
2 11300 66 진행률미수금
3 25500 57216 부가세예수금
4 25900 75 선수금
5 26900 2 프로젝트손실충당부채
6 40100 44 상품매출
7 40401 57419 제품매출
8 54100 1 프로젝트손실전입액(제)
%>% write_csv("data/je/A02_tbl.csv")
A02_tbl %>% write_csv("data/je/A03.csv")
A03 %>% write_csv("data/je/B09.csv") B09
데이터 과학자 이광춘 저작
kwangchun.lee.7@gmail.com