R | Journal Entry Test | Fast track 블로그에 게시된 형 회계사님 데이터를 참고한다.
library(tidyverse)
library(readxl)
je_raw <- read_csv("data/je/je.csv", locale=locale('ko',encoding='euc-kr'))
je_tbl <- je_raw %>%
mutate(JEDATE = lubridate::ymd(JEDATE)) %>%
mutate(요일 = lubridate::wday(JEDATE, label = TRUE)) library(skimr)
skim(je_tbl)| Name | je_tbl |
| Number of rows | 309043 |
| Number of columns | 23 |
| _______________________ | |
| Column type frequency: | |
| character | 9 |
| Date | 1 |
| factor | 1 |
| numeric | 12 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| ACCT_NM | 0 | 1.00 | 2 | 16 | 0 | 211 | 0 |
| 거래처[코드] | 2172 | 0.99 | 5 | 5 | 0 | 5909 | 0 |
| 결의사원[코드] | 0 | 1.00 | 10 | 10 | 0 | 18 | 0 |
| 유형번호[유형명] | 0 | 1.00 | 4 | 4 | 0 | 5 | 0 |
| 증빙구분[구분명] | 222980 | 0.28 | 2 | 12 | 0 | 9 | 0 |
| INSERT_ID | 178226 | 0.42 | 10 | 10 | 0 | 17 | 0 |
| 입력일자 | 178226 | 0.42 | 21 | 22 | 0 | 20885 | 0 |
| MODIFY_ID | 772 | 1.00 | 10 | 10 | 0 | 10 | 0 |
| MODIFY_DT | 772 | 1.00 | 21 | 22 | 0 | 13253 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| JEDATE | 0 | 1 | 2017-01-01 | 2017-12-31 | 2017-07-10 | 326 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| 요일 | 0 | 1 | TRUE | 7 | Tue: 76543, Mon: 65055, Wed: 53221, Thu: 50741 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| 구 분[기표][번호] | 0 | 1.00 | 7.468400e+02 | 8.968800e+02 | 1 | 57 | 321 | 1135 | 3236 | ▇▂▁▁▁ |
| JENO | 0 | 1.00 | 6.520718e+10 | 8.598464e+10 | 201701011 | 2017092610 | 20170616160 | 201702221968 | 201712311321 | ▇▁▁▁▃ |
| LN_SQ | 0 | 1.00 | 6.761000e+01 | 1.906500e+02 | 1 | 1 | 2 | 4 | 997 | ▇▁▁▁▁ |
| DRCR_FG | 0 | 1.00 | 3.670000e+00 | 4.800000e-01 | 1 | 3 | 4 | 4 | 4 | ▁▁▁▃▇ |
| ACCTCD | 0 | 1.00 | 2.434893e+04 | 1.552477e+04 | 10100 | 10800 | 25301 | 40401 | 99800 | ▇▂▁▁▁ |
| DR | 207399 | 0.33 | 2.856139e+06 | 5.391426e+07 | -1000000000 | 55000 | 77000 | 200000 | 4483053872 | ▇▁▁▁▁ |
| CR | 101644 | 0.67 | 1.399763e+06 | 3.646970e+07 | -1777760000 | 9000 | 55000 | 99000 | 4483053872 | ▁▇▁▁▁ |
| 구 분[결의][년/월/일] | 0 | 1.00 | 2.017068e+07 | 3.454500e+02 | 20170101 | 20170406 | 20170710 | 20171011 | 20171231 | ▇▆▆▆▇ |
| 구 분[결의][번호] | 0 | 1.00 | 7.402300e+02 | 8.966400e+02 | 1 | 50 | 314 | 1111 | 3225 | ▇▂▁▁▁ |
| 결의부서[코드] | 0 | 1.00 | 1.936410e+03 | 8.445700e+02 | 1400 | 1400 | 1400 | 2900 | 4120 | ▇▁▁▂▁ |
| PJT[코드] | 78366 | 0.75 | 3.150050e+03 | 5.426600e+02 | 1100 | 3100 | 3100 | 3200 | 6100 | ▁▇▃▁▁ |
| 유형번호[번호] | 0 | 1.00 | 2.920000e+00 | 8.500000e-01 | 1 | 3 | 3 | 3 | 8 | ▃▇▃▁▁ |
A02_tbl <- je_tbl %>%
select(JENO, DR, CR) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
group_by(JENO) %>%
summarise(DR_sum = sum(DR),
CR_sum = sum(CR)) %>%
mutate(Differ= DR_sum - CR_sum)
A02_tbl %>%
filter(Differ > 0) %>%
nrow()[1] 0
계정과목 “<< 손 익 >>”을 기준으로 재무상태표와 손익계산서를 분리한다.
cytb_raw <- read_excel("data/je/CYTB.xlsx")
cytb_tbl <- cytb_raw %>%
mutate(계정구분 = ifelse(str_detect(계정과목, pattern = "<< 손 익 >>"), "손익계산서", NA)) %>%
fill(계정구분, .direction = "down") %>%
mutate(계정구분 = ifelse(is.na(계정구분), "대차대조표", 계정구분))
cytb_bs_tbl <- cytb_tbl %>%
filter(계정구분 == "대차대조표")
cytb_pl_tbl <- cytb_tbl %>%
filter(계정구분 == "손익계산서")cytb_bs_tbl %>%
reactable::reactable(filterable = TRUE, minRows = 10)cytb_pl_tbl %>%
reactable::reactable(filterable = TRUE, minRows = 10)시산표 Reconciliation 검증 (Trial Balance Rollforward Test) 작업을 수행하자.
cytb_raw <- read_excel("data/je/CYTB.xlsx")
cytb_raw %>%
skim()| Name | Piped data |
| Number of rows | 247 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 6 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| ACCTCD | 28 | 0.89 | 5 | 5 | 0 | 219 | 0 |
| 계정과목 | 0 | 1.00 | 5 | 18 | 0 | 247 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| 차 변[잔 액] | 0 | 1 | 1355768424 | 5983007213 | -1309124319 | 0 | 0 | 82282798 | 62101614775 | ▇▁▁▁▁ |
| DRSUM | 0 | 1 | 5302402886 | 24986271565 | -1309124319 | 5514068 | 87440970 | 1006240376 | 253901405182 | ▇▁▁▁▁ |
| 차 변[당월계] | 0 | 1 | 802879793 | 3845681092 | -1362904516 | 0 | 3750000 | 51827034 | 35296046716 | ▇▁▁▁▁ |
| 대 변[당월계] | 0 | 1 | 808683217 | 3748465986 | -2087508276 | 0 | 745250 | 49324499 | 34831325277 | ▇▁▁▁▁ |
| CRSUM | 0 | 1 | 5079485073 | 21853632283 | -1272037660 | 0 | 41142000 | 826932148 | 198840705447 | ▇▁▁▁▁ |
| 대 변[잔 액] | 0 | 1 | 1132850612 | 4654045455 | -1272037660 | 0 | 0 | 5944626 | 38571973779 | ▇▁▁▁▁ |
cytb_tbl <- cytb_raw %>%
filter(!is.na(ACCTCD))pytb_raw <- read_excel("data/je/PYTB.xlsx")
pytb_raw %>%
skim()| Name | Piped data |
| Number of rows | 250 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 6 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| ACCTCD | 28 | 0.89 | 5 | 5 | 0 | 222 | 0 |
| 계정과목 | 0 | 1.00 | 6 | 18 | 0 | 250 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| 차 변[잔 액] | 0 | 1 | 1375092822 | 6049647454 | -615844901 | 0 | 0 | 97636782 | 61475340718 | ▇▁▁▁▁ |
| DRSUM | 0 | 1 | 5367913342 | 25601417912 | -615844901 | 5238137 | 81799868 | 995556416 | 262232340580 | ▇▁▁▁▁ |
| 차 변[당월계] | 0 | 1 | 663679161 | 2950865669 | -562072440 | 0 | 1581166 | 58943939 | 28404366385 | ▇▁▁▁▁ |
| 대 변[당월계] | 0 | 1 | 667012548 | 3037415563 | -440068365 | 0 | 1273409 | 54959574 | 28978902614 | ▇▁▁▁▁ |
| CRSUM | 0 | 1 | 5154250951 | 22530121965 | -1272037660 | 0 | 57420116 | 866302727 | 208816742870 | ▇▁▁▁▁ |
| 대 변[잔 액] | 0 | 1 | 1161430431 | 4671947604 | -1272037660 | 0 | 0 | 13339894 | 36756466109 | ▇▁▁▁▁ |
pytb_tbl <- pytb_raw %>%
filter(!is.na(ACCTCD))cytb_fp <- cytb_tbl %>%
slice(1:99)
cytb_pl <- cytb_tbl %>%
slice(100:n())
pytb_fp <- pytb_tbl %>%
slice(1:103)CYTB_FP <- full_join(cytb_fp, cytb_pl, by='ACCTCD') %>%
mutate_all(~replace(., is.na(.), 0)) %>%
mutate(balance = (DRSUM.x - CRSUM.x) -(DRSUM.y - CRSUM.y)) %>%
select(ACCTCD, balance)
CYTB_PL <- cytb_pl %>%
mutate(balance = (DRSUM - CRSUM)) %>%
select(ACCTCD, balance)
CYTB_balance <- bind_rows(CYTB_FP, CYTB_PL)A03 <- je_tbl %>%
select(ACCTCD, DR, CR) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
group_by(ACCTCD) %>%
summarise(DR_sum=sum(DR),
CR_sum=sum(CR)) %>%
ungroup() %>%
# Join 할 때 자료형 오류 때문에 자료형 맞춰주기
mutate(ACCTCD = as.character(ACCTCD))
A03 <- left_join(A03, CYTB_balance, by = 'ACCTCD')
skim(A03)| Name | A03 |
| Number of rows | 331 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| ACCTCD | 0 | 1 | 5 | 5 | 0 | 211 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| DR_sum | 0 | 1.00 | 1025062139 | 5667982621 | -1309124319 | 1406514 | 30173284 | 247611343 | 90863358102 | ▇▁▁▁▁ |
| CR_sum | 0 | 1.00 | 1036975033 | 5754502189 | -1002727743 | 0 | 7804962 | 112875589 | 90172042718 | ▇▁▁▁▁ |
| balance | 5 | 0.98 | -1250837 | 2542420174 | -21283534544 | -8119241 | 0 | 15937363 | 21283534544 | ▁▁▇▁▁ |
# table(is.na(A03))
# sapply(A03, function(x) sum(is.na(x)))
A03 <- A03 %>%
mutate_all(~replace(.,is.na(.), 0)) %>%
mutate(Differ = (DR_sum - CR_sum - balance))
A03 %>%
filter(Differ > 0) %>%
nrow()[1] 80
# A03[A03$Differ > 0, ] Corr_Acc <- '40401'
B09_main <- je_tbl %>%
filter(ACCTCD == Corr_Acc) %>%
select(JENO, ACCTCD)
B09_Corr <- je_tbl %>%
select(JENO, ACCTCD)
B09 <- semi_join(B09_Corr, B09_main, by = 'JENO')
B09 <- B09 %>%
filter(!is.na(ACCTCD)) %>%
count(ACCTCD)
B09# A tibble: 8 x 2
ACCTCD n
<dbl> <int>
1 10800 57348
2 11300 66
3 25500 57216
4 25900 75
5 26900 2
6 40100 44
7 40401 57419
8 54100 1
B09_name <- je_tbl %>%
select(ACCTCD, ACCT_NM) %>%
distinct()
B09 <- left_join(B09, B09_name, by = 'ACCTCD')
B09# A tibble: 8 x 3
ACCTCD n ACCT_NM
<dbl> <int> <chr>
1 10800 57348 외상매출금
2 11300 66 진행률미수금
3 25500 57216 부가세예수금
4 25900 75 선수금
5 26900 2 프로젝트손실충당부채
6 40100 44 상품매출
7 40401 57419 제품매출
8 54100 1 프로젝트손실전입액(제)
A02_tbl %>% write_csv("data/je/A02_tbl.csv")
A03 %>% write_csv("data/je/A03.csv")
B09 %>% write_csv("data/je/B09.csv")데이터 과학자 이광춘 저작
kwangchun.lee.7@gmail.com