1 의존성 문제 해결1

RBERT를 실행시키기 위해서 tensorflow가 필요한데 이에 대한 선행 작업을 다음과 같이 수행한다.

  • 파이썬 3 설치
    • brew install python3
python --version
Python 3.6.10
  • virtualenv 설치
    • brew install virtualenv
virtualenv --version

1.1 reticulate 설정

reticulate::py_config()
python:         /Users/kwangchunlee/Library/r-miniconda/envs/r-reticulate/bin/python
libpython:      /Users/kwangchunlee/Library/r-miniconda/envs/r-reticulate/lib/libpython3.6m.dylib
pythonhome:     /Users/kwangchunlee/Library/r-miniconda/envs/r-reticulate:/Users/kwangchunlee/Library/r-miniconda/envs/r-reticulate
version:        3.6.10 | packaged by conda-forge | (default, Apr 24 2020, 16:27:41)  [GCC Clang 9.0.1 ]
numpy:          /Users/kwangchunlee/Library/r-miniconda/envs/r-reticulate/lib/python3.6/site-packages/numpy
numpy_version:  1.18.5

NOTE: Python version was forced by RETICULATE_PYTHON
Sys.setenv(RETICULATE_PYTHON = "/Users/kwangchunlee/Library/r-miniconda/envs/r-reticulate/bin/python")
library(tensorflow)
library(keras)

tf$constant("Hellow Tensorflow")
Tensor("Const:0", shape=(), dtype=string)

2 RBERT 설치

# tensorflow::install_tensorflow(version = "1.13.1")
# devtools::install_github("jonathanbratt/RBERT", build_vignettes = TRUE)

library(RBERT)
library(tidyverse)

BERT_PRETRAINED_DIR <- RBERT::download_BERT_checkpoint(
  model = "bert_base_uncased"
)

text_to_process <- c("Impulse is equal to the change in momentum.",
                     "Changing momentum requires an impulse.",
                     "An impulse is like a push.",
                     "Impulse is force times time.")

BERT_feats <- extract_features(
  examples = text_to_process,
  ckpt_dir = BERT_PRETRAINED_DIR
)

BERT_feats
$output
# A tibble: 144 x 773
   sequence_index segment_index token_index token layer_index     V1
            <int>         <int>       <int> <chr>       <int>  <dbl>
 1              1             1           1 [CLS]           9 -0.226
 2              1             1           2 impu…           9  0.734
 3              1             1           3 is              9 -0.426
 4              1             1           4 equal           9  1.30 
 5              1             1           5 to              9 -1.13 
 6              1             1           6 the             9 -1.42 
 7              1             1           7 chan…           9 -0.219
 8              1             1           8 in              9 -1.06 
 9              1             1           9 mome…           9  0.844
10              1             1          10 .               9 -1.23 
# … with 134 more rows, and 767 more variables: V2 <dbl>, V3 <dbl>,
#   V4 <dbl>, V5 <dbl>, V6 <dbl>, V7 <dbl>, V8 <dbl>, V9 <dbl>, V10 <dbl>,
#   V11 <dbl>, V12 <dbl>, V13 <dbl>, V14 <dbl>, V15 <dbl>, V16 <dbl>,
#   V17 <dbl>, V18 <dbl>, V19 <dbl>, V20 <dbl>, V21 <dbl>, V22 <dbl>,
#   V23 <dbl>, V24 <dbl>, V25 <dbl>, V26 <dbl>, V27 <dbl>, V28 <dbl>,
#   V29 <dbl>, V30 <dbl>, V31 <dbl>, V32 <dbl>, V33 <dbl>, V34 <dbl>,
#   V35 <dbl>, V36 <dbl>, V37 <dbl>, V38 <dbl>, V39 <dbl>, V40 <dbl>,
#   V41 <dbl>, V42 <dbl>, V43 <dbl>, V44 <dbl>, V45 <dbl>, V46 <dbl>,
#   V47 <dbl>, V48 <dbl>, V49 <dbl>, V50 <dbl>, V51 <dbl>, V52 <dbl>,
#   V53 <dbl>, V54 <dbl>, V55 <dbl>, V56 <dbl>, V57 <dbl>, V58 <dbl>,
#   V59 <dbl>, V60 <dbl>, V61 <dbl>, V62 <dbl>, V63 <dbl>, V64 <dbl>,
#   V65 <dbl>, V66 <dbl>, V67 <dbl>, V68 <dbl>, V69 <dbl>, V70 <dbl>,
#   V71 <dbl>, V72 <dbl>, V73 <dbl>, V74 <dbl>, V75 <dbl>, V76 <dbl>,
#   V77 <dbl>, V78 <dbl>, V79 <dbl>, V80 <dbl>, V81 <dbl>, V82 <dbl>,
#   V83 <dbl>, V84 <dbl>, V85 <dbl>, V86 <dbl>, V87 <dbl>, V88 <dbl>,
#   V89 <dbl>, V90 <dbl>, V91 <dbl>, V92 <dbl>, V93 <dbl>, V94 <dbl>,
#   V95 <dbl>, V96 <dbl>, V97 <dbl>, V98 <dbl>, V99 <dbl>, V100 <dbl>,
#   V101 <dbl>, …
output_vector1 <- BERT_feats$output %>%
  dplyr::filter(
    sequence_index == 1, 
    token == "[CLS]", 
    layer_index == 12
  ) %>% 
  dplyr::select(dplyr::starts_with("V")) %>% 
  unlist()
output_vector1
          V1           V2           V3           V4           V5 
-0.707382917 -0.206483752  0.069142193  0.134183094 -0.798292816 
          V6           V7           V8           V9          V10 
-0.589918792  0.013870991  0.331387162  0.606351554 -0.628228247 
         V11          V12          V13          V14          V15 
-0.565253317 -0.103267051 -0.033182636  0.288546711  0.327375531 
         V16          V17          V18          V19          V20 
 0.278905630  0.150683492  0.231732130  0.212081760 -0.128903344 
         V21          V22          V23          V24          V25 
-0.434507668  0.503891706  0.019407989  0.068769597 -0.063417733 
         V26          V27          V28          V29          V30 
-0.710997343  0.133809149 -0.590590596  0.515468717  0.224920720 
         V31          V32          V33          V34          V35 
-0.422487408  0.689604282 -1.036473036 -0.058203109  0.717361927 
         V36          V37          V38          V39          V40 
-0.466528267  0.723749161  0.256582260  0.837740123 -0.069649890 
         V41          V42          V43          V44          V45 
-0.900595725  0.245259017 -0.055503819 -0.204553992 -0.530905366 
         V46          V47          V48          V49          V50 
-0.989654839 -3.348092794 -0.253108442  0.368535548 -0.197165251 
         V51          V52          V53          V54          V55 
 0.360229731  0.006520845 -0.164570093  0.468920201 -0.128885373 
         V56          V57          V58          V59          V60 
 0.919494808 -1.137635350  0.157739967  0.166754961  0.100679576 
         V61          V62          V63          V64          V65 
 0.621348441  0.167383552 -0.258787066  0.074120916  0.143015578 
         V66          V67          V68          V69          V70 
 0.270326167  0.083169475  0.694940865 -0.367451042 -0.100173309 
         V71          V72          V73          V74          V75 
-1.380730391 -0.521223605  0.112947598  0.299328268 -0.354910493 
         V76          V77          V78          V79          V80 
-0.528601766 -0.104755118 -0.253713220 -0.676248133 -0.315365225 
         V81          V82          V83          V84          V85 
-0.225514248  1.150612354 -0.144777060 -0.240366638  0.266400695 
         V86          V87          V88          V89          V90 
 0.532273412 -0.362271100 -0.088408671 -0.417219818  1.245668769 
         V91          V92          V93          V94          V95 
-0.425260305 -0.091363519 -0.439701259  0.029550696  0.164154872 
         V96          V97          V98          V99         V100 
 0.542801976  0.275681138 -0.454286754 -0.493972778  0.293675452 
        V101         V102         V103         V104         V105 
 0.360117674  0.024289977  0.789332271 -1.071681142  0.746358871 
        V106         V107         V108         V109         V110 
-0.092599511 -0.100203291 -0.430343449  0.052853376 -1.284695148 
        V111         V112         V113         V114         V115 
 0.083292350  0.178942546 -0.520664215 -0.779974341 -0.001844665 
        V116         V117         V118         V119         V120 
 0.271038353  0.012202680 -0.697474301  0.318992972  0.277578980 
        V121         V122         V123         V124         V125 
-0.254071176  0.035755478 -0.180599779 -0.031141140 -0.241107583 
        V126         V127         V128         V129         V130 
 0.065576598  0.570737123  0.564649045  0.009803750  0.037334517 
        V131         V132         V133         V134         V135 
 0.997106969  0.726181626  0.563734233 -1.023843646 -0.812137663 
        V136         V137         V138         V139         V140 
 0.078173041 -0.140065193  0.315312684 -0.938557923 -0.276807517 
        V141         V142         V143         V144         V145 
-0.431576967 -1.089326739 -1.248966336 -0.254021406  1.626378298 
        V146         V147         V148         V149         V150 
-0.187695771  0.261305392 -0.245586365  0.610063314  0.249108031 
        V151         V152         V153         V154         V155 
 0.401066333  0.516565323 -0.013693161 -0.355235338 -0.664109766 
        V156         V157         V158         V159         V160 
-0.478989363 -0.147463650  0.895680070 -0.153740257  0.919177592 
        V161         V162         V163         V164         V165 
 0.335276872 -0.249597490  0.075410642 -0.393075824 -0.569519341 
        V166         V167         V168         V169         V170 
 0.002203170  0.711381733  0.661786973 -0.127559155 -0.005226277 
        V171         V172         V173         V174         V175 
 0.255445629 -0.349629343  1.099176288  0.516140938  0.015545040 
        V176         V177         V178         V179         V180 
-0.644326448 -0.278436720  0.229246855  0.955751657 -0.284989774 
        V181         V182         V183         V184         V185 
-0.269132584  0.619577169  0.131092399  0.190352201  0.295835644 
        V186         V187         V188         V189         V190 
-0.426219940  0.132291928 -0.203202739 -0.110414937  0.230058730 
        V191         V192         V193         V194         V195 
-0.608146667 -0.512819171  0.052936327 -0.665298104  0.124379292 
        V196         V197         V198         V199         V200 
 0.040990673  0.250830352 -1.249274850 -0.637068331  0.398070991 
        V201         V202         V203         V204         V205 
-0.078296617  0.403075099 -0.382743061 -0.094953805  0.202314615 
        V206         V207         V208         V209         V210 
 3.407658815  0.661145568  0.129825756  0.086710513  0.342992127 
        V211         V212         V213         V214         V215 
-0.628131688  0.218763605  0.481584191 -0.682936490 -0.080902457 
        V216         V217         V218         V219         V220 
-0.102310002  0.297100604  0.187507659  0.349809080  0.318771839 
        V221         V222         V223         V224         V225 
 0.422238171 -0.384314001 -0.349726051  0.259748340 -0.228251129 
        V226         V227         V228         V229         V230 
 0.950016499  0.657122076  1.292131543  0.268439531 -1.760141015 
        V231         V232         V233         V234         V235 
 0.049427547 -0.202975318  0.167167097  0.609172404 -0.719239414 
        V236         V237         V238         V239         V240 
 0.130623624 -0.078787193 -0.769529521  0.298008144 -0.877272129 
        V241         V242         V243         V244         V245 
-0.127164572  1.130953074 -0.231157020  0.052004501 -0.117016867 
        V246         V247         V248         V249         V250 
 0.333208442 -0.080097541 -1.079724550  0.244068220 -0.405482262 
        V251         V252         V253         V254         V255 
 0.183528215 -0.079978868  0.575451076 -0.225398868 -0.356659770 
        V256         V257         V258         V259         V260 
-0.484905094  0.322958231 -0.141092554  0.227844208 -0.033191480 
        V261         V262         V263         V264         V265 
 0.270280182 -0.322629660  1.201942801  0.448952347 -1.507371187 
        V266         V267         V268         V269         V270 
-0.814218462 -0.253616840  0.029891584 -0.210646927 -0.067543246 
        V271         V272         V273         V274         V275 
-0.168238252 -0.925852060 -0.037364978 -1.330722213 -0.470154703 
        V276         V277         V278         V279         V280 
-0.055983305  0.579909563  0.495906413 -0.581372857  0.238221139 
        V281         V282         V283         V284         V285 
 0.140195161  0.190930128 -0.812105000  0.711582065  0.577919126 
        V286         V287         V288         V289         V290 
 0.176466346 -0.041693211 -0.637363434  0.588006139 -0.392531097 
        V291         V292         V293         V294         V295 
-0.799684227 -0.342650831 -0.596351445  0.092095390  0.076636747 
        V296         V297         V298         V299         V300 
-0.054162875  0.427848250 -0.176791787  0.016718909 -0.515602231 
        V301         V302         V303         V304         V305 
-0.194364130 -1.055155396  0.272971392  0.255808592 -1.006324649 
        V306         V307         V308         V309         V310 
-0.284764677  0.255929291  0.039363291 -2.272679090  0.481076926 
        V311         V312         V313         V314         V315 
-0.393682629  0.183252424  0.263635665  0.456203789  0.868597806 
        V316         V317         V318         V319         V320 
-0.076859549 -1.012633085  0.387531310 -0.163993061 -1.059786797 
        V321         V322         V323         V324         V325 
-0.267635763  0.119957693  0.843602300  0.478386283  0.665618777 
        V326         V327         V328         V329         V330 
 0.219283596  0.379469305  0.658605337  0.001824733 -0.557778835 
        V331         V332         V333         V334         V335 
-0.277522713 -0.402393997  0.577685893  0.174693421 -0.453637481 
        V336         V337         V338         V339         V340 
-0.005125910 -0.126045823 -0.055197038  0.659823179 -0.339268327 
        V341         V342         V343         V344         V345 
 0.441491246 -0.784900308 -0.033801973 -0.428951144  0.034157552 
        V346         V347         V348         V349         V350 
-0.527909935  0.700323522 -0.041225873 -0.159635529  0.804156005 
        V351         V352         V353         V354         V355 
 0.448905855 -0.042217456  0.952864230 -0.027718864  0.032418042 
        V356         V357         V358         V359         V360 
 0.445918620  0.117699444  0.313362092 -0.568123519  0.879211426 
        V361         V362         V363         V364         V365 
 0.658401310 -0.682118952 -0.088479519 -0.133385450  0.902846813 
        V366         V367         V368         V369         V370 
 0.129314661  0.268315881 -0.067870572  1.052671075 -1.153124928 
        V371         V372         V373         V374         V375 
 0.219350636 -0.343799829  0.197253734 -0.922238827  0.230076939 
        V376         V377         V378         V379         V380 
-0.372171938  0.773603082  0.407868147 -0.262662321  0.742032766 
        V381         V382         V383         V384         V385 
-0.281422615 -1.342195272 -0.290935457 -0.297882676  0.059175059 
        V386         V387         V388         V389         V390 
 0.174239546  0.300090969 -0.197188124 -0.475836039 -0.498036087 
        V391         V392         V393         V394         V395 
-1.342556357  0.473824233 -0.123171553 -0.523460150  0.129574001 
        V396         V397         V398         V399         V400 
 0.265328735 -1.319045782 -0.338233858 -0.525906026  0.840900183 
        V401         V402         V403         V404         V405 
 1.213036060 -0.285932034 -1.531113625 -0.362636536  0.096930526 
        V406         V407         V408         V409         V410 
-0.510188520  0.725126326 -0.576188743 -0.262867779 -0.231129751 
        V411         V412         V413         V414         V415 
 0.266235888  0.025870640 -0.445261896  0.281145096 -0.999362528 
        V416         V417         V418         V419         V420 
 0.270357907 -0.166788563  0.554120421 -0.474761426 -0.250294477 
        V421         V422         V423         V424         V425 
 0.113824576 -0.003412619  1.290703535 -0.546085060 -0.435865551 
        V426         V427         V428         V429         V430 
 1.323131323  0.374748558  0.698527217  0.923177481  0.127986565 
        V431         V432         V433         V434         V435 
-0.040943332 -0.893372178 -0.374530703  0.416370928  0.380825102 
        V436         V437         V438         V439         V440 
-0.107731134 -0.814569414 -0.264975637 -0.497599363 -0.136085019 
        V441         V442         V443         V444         V445 
-0.755808711  0.089591026  0.121493161 -0.575258851  0.279298276 
        V446         V447         V448         V449         V450 
 0.626972854  0.500576973 -0.175842106  0.956971943  0.618582249 
        V451         V452         V453         V454         V455 
-0.570107818 -0.625137627 -0.223803297  1.165899038  0.073785231 
        V456         V457         V458         V459         V460 
-0.270937800 -0.023346353  0.873185158  0.510629773 -0.717652738 
        V461         V462         V463         V464         V465 
-0.353595078 -0.689717710  0.290874958 -0.655175805 -1.169901729 
        V466         V467         V468         V469         V470 
-0.513363779  0.101504415 -1.054045677  0.128135175  0.291974306 
        V471         V472         V473         V474         V475 
-1.397549868 -0.545363188  0.510590494  0.485544264  0.067004114 
        V476         V477         V478         V479         V480 
-0.355048865 -0.195310354  0.695841134  0.101323202  0.015474211 
        V481         V482         V483         V484         V485 
 0.124772422 -0.630418956  0.184272379  0.494461060 -0.115648441 
        V486         V487         V488         V489         V490 
-0.408175468  0.448609829 -0.447077632  0.189439148 -0.479406595 
        V491         V492         V493         V494         V495 
-0.766415358 -0.195094183  0.759935200 -0.968636334  0.227311879 
        V496         V497         V498         V499         V500 
-0.720171213 -0.488218427 -0.504908085 -0.016723216  0.343273669 
        V501         V502         V503         V504         V505 
-0.192890137 -1.125163555 -0.774845243 -0.061224695  0.109500155 
        V506         V507         V508         V509         V510 
-0.128214985  0.198328316 -0.181318581  1.270762563  1.246992588 
        V511         V512         V513         V514         V515 
-0.917330801  0.356388777  0.154612839 -0.014691506  0.138227865 
        V516         V517         V518         V519         V520 
 0.712660372 -0.510587931  0.438331902 -0.960962117 -0.829692364 
        V521         V522         V523         V524         V525 
-0.644994497 -0.528421640 -0.256043911 -0.078093186  0.278276622 
        V526         V527         V528         V529         V530 
-0.581769586 -0.214111909 -0.022284009 -0.605088770 -0.404536128 
        V531         V532         V533         V534         V535 
 0.107007384 -0.002547226  0.080371849 -0.386875212 -0.338473320 
        V536         V537         V538         V539         V540 
-0.750163853 -0.167089224 -0.830227137 -0.876406133  0.399444699 
        V541         V542         V543         V544         V545 
 0.493068516 -0.020540230 -0.488464206  0.020406287 -0.656333685 
        V546         V547         V548         V549         V550 
-0.067093402 -0.411014557 -0.425000459  0.081778243  0.050408222 
        V551         V552         V553         V554         V555 
 0.040145412 -0.860253870 -0.080254376  0.497177303  0.550607741 
        V556         V557         V558         V559         V560 
 0.081090525 -0.005928989 -0.204178423  0.226873547  0.272223711 
        V561         V562         V563         V564         V565 
-1.088798523 -0.340527385  0.399161100 -0.081619024  0.373200446 
        V566         V567         V568         V569         V570 
-0.436997771  0.286168307 -0.331678510  0.642864883  0.072091363 
        V571         V572         V573         V574         V575 
-0.107398689  0.037629526  0.087937996  0.843734026  0.425460041 
        V576         V577         V578         V579         V580 
 0.642786145  0.340299457 -0.608012199  0.468952060 -0.686615944 
        V581         V582         V583         V584         V585 
-0.315563381 -0.271953166  0.259512365  0.342220247  0.179718465 
        V586         V587         V588         V589         V590 
-0.153990924  0.269073665 -0.785894573  1.490729690  0.538417578 
        V591         V592         V593         V594         V595 
 0.413520843 -0.138673067  0.690002680  0.364513874  0.171176061 
        V596         V597         V598         V599         V600 
 0.845468938 -0.462004811  0.704325557 -0.852577507  0.062689453 
        V601         V602         V603         V604         V605 
-0.237946346  1.339154959  0.369980365  0.511419296  0.100916341 
        V606         V607         V608         V609         V610 
-0.269511044 -0.774623752 -0.466983467 -0.494631529  0.358213127 
        V611         V612         V613         V614         V615 
 0.488821507  0.026737325  0.008949973  0.717032731  0.115034871 
        V616         V617         V618         V619         V620 
-0.263363361  0.075989030  0.445580363  0.219485685  0.593416631 
        V621         V622         V623         V624         V625 
-0.436996996 -0.078865215 -1.005222201 -0.583927751  0.256022096 
        V626         V627         V628         V629         V630 
-0.332528770 -0.689943612  0.426239192 -0.225680247 -0.787585497 
        V631         V632         V633         V634         V635 
 1.170485377 -0.045915484 -0.336460531  0.957744479 -0.461872697 
        V636         V637         V638         V639         V640 
-0.048079215 -0.340103686  0.618069768  1.172930121  0.253866255 
        V641         V642         V643         V644         V645 
-0.984215319  0.862671912 -0.352393031 -0.111014701  0.597480237 
        V646         V647         V648         V649         V650 
 0.557858348 -0.600097895  0.543230772  0.468553185  0.857713401 
        V651         V652         V653         V654         V655 
-0.188041151  0.269941419 -0.221002445 -0.159422100 -0.161348343 
        V656         V657         V658         V659         V660 
 0.833555698  0.033192411 -0.179883629 -0.003060237  0.569723427 
        V661         V662         V663         V664         V665 
 0.357718796  0.699060321  0.313856721  0.302993596  0.575642765 
        V666         V667         V668         V669         V670 
-0.051228836 -0.453384012 -1.118219852  0.391545355  0.666636288 
        V671         V672         V673         V674         V675 
 1.150002599 -0.664185882  0.553151071 -0.143985957  0.328802764 
        V676         V677         V678         V679         V680 
 0.331800103 -0.238317400  0.346388906  0.377521276  0.777694643 
        V681         V682         V683         V684         V685 
-0.277723044  0.179897517  0.721463025  0.511422038  0.011029847 
        V686         V687         V688         V689         V690 
-0.145932510 -0.377478480 -0.303765774 -0.443033367 -0.039036982 
        V691         V692         V693         V694         V695 
-0.188079491 -1.398189783 -0.637902498  0.334215343  0.307030767 
        V696         V697         V698         V699         V700 
 0.016914867  0.416124642 -0.749973655  1.076300502  0.125640839 
        V701         V702         V703         V704         V705 
 0.332057804 -0.023831360  0.614924729  0.295576215  0.211165845 
        V706         V707         V708         V709         V710 
 0.518762171  0.269092202 -1.126153231  0.569447637 -0.703825355 
        V711         V712         V713         V714         V715 
-0.192900598  0.251399368  0.027419411  0.970783651 -0.597183406 
        V716         V717         V718         V719         V720 
 1.385594249 -0.250855833  0.030310787  0.050497908 -0.118641272 
        V721         V722         V723         V724         V725 
 0.303394258  1.110183835 -0.193579331  0.641880512 -0.128473178 
        V726         V727         V728         V729         V730 
 0.247657984 -0.149536073 -0.336854339 -0.224512428  0.037582412 
        V731         V732         V733         V734         V735 
-0.230640069  1.041052818  0.016714470 -0.245454282  0.601938188 
        V736         V737         V738         V739         V740 
 0.536032319 -0.834958553 -0.807084799 -0.749644458  1.272573113 
        V741         V742         V743         V744         V745 
 0.580855012  0.098985396  0.358252764 -0.030859120  0.403456479 
        V746         V747         V748         V749         V750 
 0.691784263 -0.246995658 -0.137335494 -0.133693695  0.258291751 
        V751         V752         V753         V754         V755 
 0.167389870  0.784490407 -3.851931572 -0.326559991 -0.726175606 
        V756         V757         V758         V759         V760 
-0.120646395  0.095286146  0.043689609  0.237973928 -0.422520310 
        V761         V762         V763         V764         V765 
 0.215968937 -0.359701544 -0.377799779  0.599027932  0.049806491 
        V766         V767         V768 
-0.237085283 -0.384729862  0.739824414 
tokens <- tokenize_text(text = "Who doesn't like tacos?",
                        ckpt_dir = BERT_PRETRAINED_DIR)
tokens
[[1]]
 [1] "[CLS]" "who"   "doesn" "'"     "t"     "like"  "ta"    "##cos"
 [9] "?"     "[SEP]"
check_vocab(words = c("positron", "electron"), ckpt_dir = BERT_PRETRAINED_DIR)
[1] FALSE  TRUE
 

데이터 과학자 이광춘 저작

kwangchun.lee.7@gmail.com