library(tidyverse)
## -- Attaching packages -------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.0.0     <U+221A> purrr   0.2.5
## <U+221A> tibble  1.4.2     <U+221A> dplyr   0.7.6
## <U+221A> tidyr   0.8.1     <U+221A> stringr 1.3.1
## <U+221A> readr   1.1.1     <U+221A> forcats 0.3.0
## -- Conflicts ----------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
  andmed=read_csv("http://www.tlu.ee/~jaagup/andmed/keel/korpus/doksonaliigid.txt")
## Parsed with column specification:
## cols(
##   kood = col_character(),
##   A = col_integer(),
##   C = col_integer(),
##   D = col_integer(),
##   G = col_integer(),
##   H = col_integer(),
##   I = col_integer(),
##   J = col_integer(),
##   K = col_integer(),
##   N = col_integer(),
##   P = col_integer(),
##   S = col_integer(),
##   U = col_integer(),
##   V = col_integer(),
##   X = col_integer(),
##   Y = col_integer(),
##   Z = col_integer(),
##   kokku = col_integer()
## )
  head(andmed)
## # A tibble: 6 x 18
##   kood       A     C     D     G     H     I     J     K     N     P     S
##   <chr>  <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 doc_1~    25     0    14     0     3     0    19     5     3    17    54
## 2 doc_1~     4     0     5     0     4     0    12     1     3    14    31
## 3 doc_1~     9     0     6     0     2     0    13     1     3    17    53
## 4 doc_1~    46     7    50     4    20     0    38     3     2    34   183
## 5 doc_1~    43     7    49     4    21     0    37     6     2    39   182
## 6 doc_1~    45     7    51     4    20     0    38     4     2    37   180
## # ... with 6 more variables: U <int>, V <int>, X <int>, Y <int>, Z <int>,
## #   kokku <int>
  andmed <- andmed %>% filter(S>0)
  koodid <- andmed$kood
  arvud <- andmed %>% select(A:Z)
  k <- prcomp(arvud)
  k
## Standard deviations (1, .., p=16):
##  [1] 135.4747633  50.0896571  22.3702160  11.9097138  10.8815591
##  [6]   8.6494909   8.4137031   6.9975662   6.2364811   5.4485878
## [11]   3.1883229   2.4634667   1.4584338   0.8271939   0.3871350
## [16]   0.2004371
## 
## Rotation (n x k) = (16 x 16):
##             PC1           PC2           PC3          PC4           PC5
## A -0.1041849602  0.1621765910 -1.156903e-01  0.132294003  0.3233249741
## C -0.0106252296  0.0166247888 -1.444131e-02 -0.005916120  0.0118451553
## D -0.1449092894  0.2825085935 -5.187632e-02  0.197197039 -0.0733779488
## G -0.0113691477  0.0134113541 -6.908072e-03  0.001351147  0.0574571368
## H -0.1091388370 -0.0277987295  3.326823e-01  0.869635455 -0.0905498961
## I  0.0008873210  0.0020126227  7.473663e-03  0.001365954 -0.0015536325
## J -0.1218683252  0.2076541831 -6.604906e-02  0.114379296  0.0206460695
## K -0.0331953679  0.0478755294 -9.209988e-03  0.102391056  0.0338163890
## N -0.0454310760  0.0609312787  1.570972e-01  0.067698317 -0.0319194493
## P -0.1451407569  0.3390797528 -1.562809e-01  0.041859440 -0.6889111043
## S -0.7802315055 -0.4901443563 -3.532559e-01  0.044324756  0.0690329519
## U -0.0010642765  0.0014556391  8.908265e-05  0.004446359  0.0029165440
## V -0.2956483470  0.5803841120 -2.790217e-01 -0.131135911 -0.0067334942
## X -0.0004360576  0.0006177858  5.148343e-04  0.003256219  0.0005155047
## Y -0.1156084268 -0.3020670719  2.953552e-01 -0.230289324 -0.5899425411
## Z -0.4553824154  0.2453983098  7.276721e-01 -0.291856858  0.2199869965
##             PC6           PC7           PC8           PC9          PC10
## A -0.1604501768 -4.491844e-02 -0.0091751015  0.2138071129 -0.8558831865
## C -0.0527667104  3.046421e-02 -0.0033003697 -0.0022749388 -0.0072645670
## D -0.6781027311 -4.966642e-01  0.0915604550  0.2081793280  0.2965971937
## G  0.0650969268 -1.277361e-02 -0.0011271838 -0.0969387571  0.1129709270
## H  0.2321919911  4.317765e-02  0.2185023173 -0.0669710781  0.0024799350
## I  0.0058506241  8.274368e-05 -0.0091838807  0.0071245133  0.0133877055
## J -0.2882774939  1.414189e-01 -0.3594413920 -0.8259477258 -0.0722146960
## K -0.0713043095  2.953630e-02 -0.0349752502  0.0648032953 -0.0390123057
## N -0.3783035328  7.895976e-01 -0.1972671203  0.3582211608  0.1331059209
## P  0.2806046573 -9.000413e-02 -0.4622626228  0.2037125354 -0.1354251139
## S  0.0283570508  1.551724e-02 -0.0849050437  0.0513475461  0.0901150581
## U -0.0037515123 -5.592466e-03 -0.0017064911 -0.0025075253 -0.0004161576
## V  0.1786262083  2.695872e-01  0.6002299575 -0.0877734183  0.0795958681
## X  0.0008930388 -2.351791e-03  0.0006119455 -0.0009656594 -0.0008713642
## Y -0.3021570809  4.350647e-02  0.4087607913 -0.1716167983 -0.3293465561
## Z  0.1392737183 -1.451449e-01 -0.1615449172  0.0316086189  0.0278048278
##           PC11          PC12          PC13          PC14          PC15
## A  0.145257211  0.0005589865 -0.0117820722  0.0091411699 -7.227232e-04
## C -0.071173204  0.0520173976  0.9914463371  0.0653776338 -2.064410e-02
## D  0.065931693  0.0354895472 -0.0197897126 -0.0044594328 -4.373447e-03
## G  0.726894985 -0.6581170409  0.0886919200  0.0202534753 -1.117926e-03
## H  0.025771211  0.0549791191  0.0211983791 -0.0035277913 -2.119842e-03
## I  0.012752954  0.0374786489 -0.0659598624  0.9966923166  1.563901e-02
## J -0.032195759  0.0476573106 -0.0336138291  0.0017484725 -3.110503e-03
## K -0.651040529 -0.7391833690 -0.0165980899  0.0353427576 -1.380029e-02
## N  0.102506352  0.0236072975 -0.0348100626 -0.0100120244  5.161888e-03
## P  0.045216113 -0.0195756562  0.0192031465 -0.0046407737  1.187877e-03
## S -0.002185184  0.0207224397 -0.0056125193  0.0007276351  4.815301e-05
## U -0.010056322 -0.0099420304  0.0214591716 -0.0136742949  9.994707e-01
## V -0.032795029  0.0185278983 -0.0175889672  0.0038851883  2.771722e-03
## X -0.001834230 -0.0061070551 -0.0019496772 -0.0053762615  1.102347e-02
## Y  0.029662406 -0.0886658617  0.0009608875  0.0120691524  1.839953e-03
## Z -0.027885307  0.0107105989  0.0067378885 -0.0072781017 -1.145542e-03
##            PC16
## A  0.0008756747
## C -0.0028174351
## D  0.0006510823
## G  0.0025183647
## H  0.0028603163
## I -0.0053211142
## J  0.0003709263
## K  0.0056062392
## N -0.0025889824
## P  0.0001113755
## S -0.0002707752
## U  0.0111586062
## V -0.0001874964
## X -0.9998925027
## Y -0.0002668718
## Z  0.0001991046
  summary(k)
## Importance of components:
##                             PC1     PC2      PC3      PC4      PC5     PC6
## Standard deviation     135.4748 50.0897 22.37022 11.90971 10.88156 8.64949
## Proportion of Variance   0.8378  0.1145  0.02284  0.00648  0.00541 0.00342
## Cumulative Proportion    0.8378  0.9524  0.97523  0.98170  0.98711 0.99052
##                            PC7     PC8     PC9    PC10    PC11    PC12
## Standard deviation     8.41370 6.99757 6.23648 5.44859 3.18832 2.46347
## Proportion of Variance 0.00323 0.00224 0.00178 0.00136 0.00046 0.00028
## Cumulative Proportion  0.99376 0.99599 0.99777 0.99912 0.99959 0.99986
##                          PC13    PC14    PC15   PC16
## Standard deviation     1.4584 0.82719 0.38713 0.2004
## Proportion of Variance 0.0001 0.00003 0.00001 0.0000
## Cumulative Proportion  1.0000 0.99999 1.00000 1.0000
k <- prcomp(scale(arvud))
summary(k)
## Importance of components:
##                           PC1    PC2     PC3    PC4     PC5    PC6    PC7
## Standard deviation     2.9492 1.2990 1.02354 0.9798 0.93456 0.8953 0.7322
## Proportion of Variance 0.5436 0.1055 0.06548 0.0600 0.05459 0.0501 0.0335
## Cumulative Proportion  0.5436 0.6491 0.71457 0.7746 0.82916 0.8792 0.9128
##                            PC8     PC9    PC10    PC11    PC12    PC13
## Standard deviation     0.65535 0.53055 0.45099 0.38324 0.31458 0.30009
## Proportion of Variance 0.02684 0.01759 0.01271 0.00918 0.00619 0.00563
## Cumulative Proportion  0.93960 0.95719 0.96990 0.97908 0.98527 0.99090
##                           PC14    PC15    PC16
## Standard deviation     0.26332 0.21866 0.16876
## Proportion of Variance 0.00433 0.00299 0.00178
## Cumulative Proportion  0.99523 0.99822 1.00000
k$rotation
##           PC1          PC2           PC3         PC4          PC5
## A -0.31087597  0.146916284 -0.0498581665  0.03213802 -0.027209266
## C -0.25513382  0.147741004 -0.0946818597  0.24474675  0.137864356
## D -0.31387518  0.172756130  0.0128193387  0.02166181  0.003024961
## G -0.16322131  0.043078005 -0.0723912588 -0.27230951 -0.754756383
## H -0.23866986 -0.374697078  0.1452368397 -0.16229444  0.063419389
## I  0.03592187  0.118333384  0.9075071486 -0.16315266  0.035468594
## J -0.31804499  0.142540532  0.0050282986  0.04460358 -0.054036142
## K -0.29898869  0.083440401 -0.0118813285  0.02414109  0.168727700
## N -0.22533396 -0.066274932  0.2812114470  0.24210037  0.153512978
## P -0.29587660  0.227168047  0.0368694207  0.04765719 -0.057451657
## S -0.27270900 -0.379196298 -0.0606859396  0.03836201 -0.083288670
## U -0.14597692  0.067410960 -0.2061864602 -0.24216300  0.555718314
## V -0.31791547  0.193016972 -0.0008011966  0.05573681 -0.091666256
## X -0.11941209  0.009115484 -0.0683804211 -0.82452190  0.140394874
## Y -0.12228974 -0.683333905  0.0196377282  0.06851352  0.015614352
## Z -0.31626163 -0.185987592  0.0813524974  0.01668849 -0.054164007
##            PC6          PC7         PC8         PC9         PC10
## A  0.023324833 -0.134762302  0.07215282 -0.10066368  0.352147407
## C  0.088532614 -0.030744187 -0.75076182 -0.43319483 -0.222866215
## D  0.017448347 -0.122706043  0.10913699  0.07129171 -0.134883862
## G -0.383705840  0.319246912 -0.16110160 -0.08468172  0.068033359
## H -0.016632631  0.153988472  0.38814904 -0.57759282 -0.450451365
## I -0.207375247 -0.224354858 -0.16598197 -0.04435773  0.090362445
## J  0.009296958 -0.034661905  0.02933134  0.09708100 -0.073291890
## K  0.097311506  0.003782828  0.26801170 -0.32417916  0.558469796
## N  0.187867785  0.790201913 -0.07240836  0.27471610  0.100017125
## P  0.056094484 -0.178214832  0.17920351  0.33015663 -0.421440376
## S -0.055742998 -0.265178827 -0.12088290  0.04747525  0.286249504
## U -0.725700522  0.121610388 -0.05051864  0.13371700 -0.012483654
## V  0.027733444 -0.116793234  0.05572722  0.17558503 -0.061919762
## X  0.470028333  0.039738795 -0.21190625  0.11541275  0.021468711
## Y -0.028264506 -0.171884447 -0.19781634  0.27333402 -0.004619323
## Z -0.023709191 -0.051016303  0.02688863  0.12802142 -0.033798004
##           PC11        PC12         PC13         PC14          PC15
## A -0.643279166  0.10880912 -0.189999039  0.185661566 -0.4714886372
## C  0.085374022  0.03054516 -0.065250397 -0.032277319 -0.0369950787
## D -0.015572003 -0.77171567 -0.172168998  0.353250837  0.2539656572
## G  0.149323498 -0.02653998 -0.062787949  0.053953535 -0.0754102049
## H -0.124223806  0.09909217  0.011773251  0.074415687  0.0228642003
## I  0.020874686  0.01725002  0.011820175  0.031827939 -0.0083425284
## J -0.044018407 -0.05309921  0.914022081 -0.012726856 -0.1245985301
## K  0.598490529 -0.02480311 -0.025046438 -0.082136587 -0.0983042679
## N -0.067896529  0.06237515 -0.051584395  0.127337934  0.0734505498
## P  0.328608425  0.42231060 -0.220176017  0.187671926 -0.2044768335
## S -0.082028099  0.32765910  0.065108236  0.252280155  0.6082258893
## U -0.008236371  0.04145222 -0.014568253  0.001914715  0.0008586619
## V -0.052908707  0.14806498 -0.118395922 -0.239570188  0.2460872774
## X -0.005741255  0.01207584  0.000403574  0.003561098  0.0071747218
## Y  0.202433550 -0.18732634 -0.020244033  0.136682882 -0.4497545969
## Z -0.131018465 -0.17014902 -0.140352854 -0.795083835  0.0181337322
##            PC16
## A -0.0306900849
## C -0.0237415546
## D -0.0757117616
## G -0.0030036778
## H  0.0925473240
## I  0.0168094588
## J -0.0277489367
## K  0.0176674510
## N -0.0193225244
## P -0.2973975590
## S -0.2135507193
## U  0.0121068350
## V  0.8008069644
## X  0.0007134055
## Y  0.2666972834
## Z -0.3685574033
f=factanal(arvud, factors=2)
f
## 
## Call:
## factanal(x = arvud, factors = 2)
## 
## Uniquenesses:
##     A     C     D     G     H     I     J     K     N     P     S     U 
## 0.132 0.456 0.100 0.780 0.403 0.981 0.094 0.259 0.609 0.137 0.103 0.852 
##     V     X     Y     Z 
## 0.036 0.903 0.058 0.072 
## 
## Loadings:
##   Factor1 Factor2
## A  0.902   0.232 
## C  0.711   0.197 
## D  0.924   0.216 
## G  0.446   0.148 
## H  0.422   0.647 
## I         -0.130 
## J  0.918   0.252 
## K  0.818   0.269 
## N  0.534   0.326 
## P  0.919   0.137 
## S  0.510   0.798 
## U  0.364   0.121 
## V  0.962   0.198 
## X  0.291   0.108 
## Y          0.969 
## Z  0.736   0.621 
## 
##                Factor1 Factor2
## SS loadings      7.142   2.883
## Proportion Var   0.446   0.180
## Cumulative Var   0.446   0.627
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 19648.59 on 89 degrees of freedom.
## The p-value is 0
f=factanal(arvud, factors=3)
f
## 
## Call:
## factanal(x = arvud, factors = 3)
## 
## Uniquenesses:
##     A     C     D     G     H     I     J     K     N     P     S     U 
## 0.124 0.462 0.108 0.777 0.289 0.979 0.103 0.201 0.580 0.127 0.115 0.810 
##     V     X     Y     Z 
## 0.006 0.866 0.005 0.071 
## 
## Loadings:
##   Factor1 Factor2 Factor3
## A  0.817   0.167   0.425 
## C  0.658   0.175   0.274 
## D  0.841   0.178   0.390 
## G  0.436   0.113   0.141 
## H  0.266   0.526   0.602 
## I         -0.136         
## J  0.838   0.209   0.388 
## K  0.689   0.180   0.540 
## N  0.440   0.272   0.390 
## P  0.898   0.143   0.216 
## S  0.448   0.749   0.351 
## U  0.270           0.337 
## V  0.945   0.187   0.255 
## X  0.213           0.294 
## Y          0.980   0.156 
## Z  0.653   0.561   0.433 
## 
##                Factor1 Factor2 Factor3
## SS loadings      5.897   2.447   2.032
## Proportion Var   0.369   0.153   0.127
## Cumulative Var   0.369   0.521   0.648
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 13934.94 on 75 degrees of freedom.
## The p-value is 0
f=factanal(scale(arvud), factors=2)
f
## 
## Call:
## factanal(x = scale(arvud), factors = 2)
## 
## Uniquenesses:
##     A     C     D     G     H     I     J     K     N     P     S     U 
## 0.132 0.456 0.100 0.780 0.403 0.981 0.094 0.259 0.609 0.137 0.103 0.852 
##     V     X     Y     Z 
## 0.036 0.903 0.058 0.072 
## 
## Loadings:
##   Factor1 Factor2
## A  0.902   0.232 
## C  0.711   0.197 
## D  0.924   0.216 
## G  0.446   0.148 
## H  0.422   0.647 
## I         -0.130 
## J  0.918   0.252 
## K  0.818   0.269 
## N  0.534   0.326 
## P  0.919   0.137 
## S  0.510   0.798 
## U  0.364   0.121 
## V  0.962   0.198 
## X  0.291   0.108 
## Y          0.969 
## Z  0.736   0.621 
## 
##                Factor1 Factor2
## SS loadings      7.142   2.883
## Proportion Var   0.446   0.180
## Cumulative Var   0.446   0.627
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 19648.59 on 89 degrees of freedom.
## The p-value is 0
f=factanal(scale(arvud), factors=3)
f
## 
## Call:
## factanal(x = scale(arvud), factors = 3)
## 
## Uniquenesses:
##     A     C     D     G     H     I     J     K     N     P     S     U 
## 0.124 0.462 0.108 0.777 0.289 0.979 0.103 0.201 0.580 0.127 0.115 0.810 
##     V     X     Y     Z 
## 0.006 0.866 0.005 0.071 
## 
## Loadings:
##   Factor1 Factor2 Factor3
## A  0.817   0.167   0.425 
## C  0.658   0.175   0.274 
## D  0.841   0.178   0.390 
## G  0.436   0.113   0.141 
## H  0.266   0.526   0.602 
## I         -0.136         
## J  0.838   0.209   0.388 
## K  0.689   0.180   0.540 
## N  0.440   0.272   0.390 
## P  0.898   0.143   0.216 
## S  0.448   0.749   0.351 
## U  0.270           0.337 
## V  0.945   0.187   0.255 
## X  0.213           0.294 
## Y          0.980   0.156 
## Z  0.653   0.561   0.433 
## 
##                Factor1 Factor2 Factor3
## SS loadings      5.897   2.447   2.032
## Proportion Var   0.369   0.153   0.127
## Cumulative Var   0.369   0.521   0.648
## 
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 13934.94 on 75 degrees of freedom.
## The p-value is 0
f=factanal(scale(arvud), factors=4)
f
## 
## Call:
## factanal(x = scale(arvud), factors = 4)
## 
## Uniquenesses:
##     A     C     D     G     H     I     J     K     N     P     S     U 
## 0.095 0.451 0.101 0.759 0.275 0.929 0.102 0.187 0.485 0.120 0.005 0.804 
##     V     X     Y     Z 
## 0.007 0.871 0.085 0.053 
## 
## Loadings:
##   Factor1 Factor2 Factor3 Factor4
## A  0.789   0.139   0.465   0.219 
## C  0.640   0.124   0.321   0.144 
## D  0.830   0.160   0.429         
## G  0.445   0.133           0.138 
## H  0.291   0.634   0.488         
## I                         -0.257 
## J  0.830   0.195   0.406         
## K  0.662   0.192   0.570   0.112 
## N  0.452   0.338   0.384  -0.222 
## P  0.897   0.104   0.254         
## S  0.474   0.723   0.246   0.432 
## U  0.251           0.350         
## V  0.940   0.143   0.287         
## X  0.204           0.285         
## Y          0.942           0.164 
## Z  0.682   0.589   0.364         
## 
##                Factor1 Factor2 Factor3 Factor4
## SS loadings      5.819   2.473   1.929   0.449
## Proportion Var   0.364   0.155   0.121   0.028
## Cumulative Var   0.364   0.518   0.639   0.667
## 
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 8265.53 on 62 degrees of freedom.
## The p-value is 0
f=factanal(scale(arvud), factors=2, rotation="promax")
f
## 
## Call:
## factanal(x = scale(arvud), factors = 2, rotation = "promax")
## 
## Uniquenesses:
##     A     C     D     G     H     I     J     K     N     P     S     U 
## 0.132 0.456 0.100 0.780 0.403 0.981 0.094 0.259 0.609 0.137 0.103 0.852 
##     V     X     Y     Z 
## 0.036 0.903 0.058 0.072 
## 
## Loadings:
##   Factor1 Factor2
## A  0.960         
## C  0.751         
## D  0.991         
## G  0.462         
## H  0.249   0.603 
## I         -0.137 
## J  0.971         
## K  0.849         
## N  0.497   0.189 
## P  1.014  -0.168 
## S  0.294   0.748 
## U  0.378         
## V  1.042  -0.112 
## X  0.297         
## Y -0.435   1.151 
## Z  0.623   0.461 
## 
##                Factor1 Factor2
## SS loadings      7.664   2.568
## Proportion Var   0.479   0.160
## Cumulative Var   0.479   0.639
## 
## Factor Correlations:
##         Factor1 Factor2
## Factor1   1.000  -0.571
## Factor2  -0.571   1.000
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 19648.59 on 89 degrees of freedom.
## The p-value is 0
names(f)
##  [1] "converged"    "loadings"     "uniquenesses" "correlation" 
##  [5] "criteria"     "factors"      "dof"          "method"      
##  [9] "rotmat"       "STATISTIC"    "PVAL"         "n.obs"       
## [13] "call"
  plot(loadings(f), type="n")
  text(loadings(f), rownames(loadings(f)))

Peakomponentide analüüsil tekstide parameetrid esimese kahe komponendi järgi

 sum(arvud[1, ] * k$rotation[, 1])
## [1] -63.36461
 sum(arvud[1, ] * k$rotation[, 2])
## [1] -8.660654
 #esimese viie teksti esimese peakomponendi väärtused
 sapply(1:5, function(nr){sum(arvud[nr, ]*k$rotation[, 1])})
## [1]  -63.36461  -34.79098  -46.48306 -209.09924 -208.61691
 esimesed=sapply(1:5, function(nr){sum(arvud[nr, ]*k$rotation[, 1])})
 teised=sapply(1:5, function(nr){sum(arvud[nr, ]*k$rotation[, 2])})
 plot(esimesed, teised)

Harjutus

Koostage peakomponentide analüüs doksonaliigid teksti põhjal(tehtud) Kuvage vene emakeelega inimeste tekstid joonisel kahe peakomponendi järgi Kuvage teise värviga soome emakeelega inimeste tekstid samade komponentide järgi

Keeleandmed failis dokmeta, teksti koodi järgi saab kokku ühendada

  doksonaliigid=read_csv("http://www.tlu.ee/~jaagup/andmed/keel/korpus/doksonaliigid.txt")
## Parsed with column specification:
## cols(
##   kood = col_character(),
##   A = col_integer(),
##   C = col_integer(),
##   D = col_integer(),
##   G = col_integer(),
##   H = col_integer(),
##   I = col_integer(),
##   J = col_integer(),
##   K = col_integer(),
##   N = col_integer(),
##   P = col_integer(),
##   S = col_integer(),
##   U = col_integer(),
##   V = col_integer(),
##   X = col_integer(),
##   Y = col_integer(),
##   Z = col_integer(),
##   kokku = col_integer()
## )
  dokmeta=read_csv("http://www.tlu.ee/~jaagup/andmed/keel/korpus/dokmeta.txt")
## Parsed with column specification:
## cols(
##   kood = col_character(),
##   korpus = col_character(),
##   tekstikeel = col_character(),
##   tekstityyp = col_character(),
##   elukoht = col_character(),
##   taust = col_character(),
##   vanus = col_character(),
##   sugu = col_character(),
##   emakeel = col_character(),
##   kodukeel = col_character(),
##   keeletase = col_character(),
##   haridus = col_character(),
##   abivahendid = col_character()
## )
 arvud <- andmed %>% select(A:Z)
 f=factanal(arvud, factors=2, rotation="promax")
 f
## 
## Call:
## factanal(x = arvud, factors = 2, rotation = "promax")
## 
## Uniquenesses:
##     A     C     D     G     H     I     J     K     N     P     S     U 
## 0.132 0.456 0.100 0.780 0.403 0.981 0.094 0.259 0.609 0.137 0.103 0.852 
##     V     X     Y     Z 
## 0.036 0.903 0.058 0.072 
## 
## Loadings:
##   Factor1 Factor2
## A  0.960         
## C  0.751         
## D  0.991         
## G  0.462         
## H  0.249   0.603 
## I         -0.137 
## J  0.971         
## K  0.849         
## N  0.497   0.189 
## P  1.014  -0.168 
## S  0.294   0.748 
## U  0.378         
## V  1.042  -0.112 
## X  0.297         
## Y -0.435   1.151 
## Z  0.623   0.461 
## 
##                Factor1 Factor2
## SS loadings      7.664   2.568
## Proportion Var   0.479   0.160
## Cumulative Var   0.479   0.639
## 
## Factor Correlations:
##         Factor1 Factor2
## Factor1   1.000  -0.571
## Factor2  -0.571   1.000
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 19648.59 on 89 degrees of freedom.
## The p-value is 0
 f$loadings[, 1:2]
##         Factor1     Factor2
## A  9.603718e-01 -0.05178683
## C  7.514388e-01 -0.02420903
## D  9.913168e-01 -0.07819499
## G  4.617851e-01  0.01303754
## H  2.487480e-01  0.60320553
## I  8.489644e-05 -0.13658900
## J  9.708608e-01 -0.03421937
## K  8.487871e-01  0.02105739
## N  4.974591e-01  0.18935271
## P  1.014478e+00 -0.16794216
## S  2.938078e-01  0.74792925
## U  3.775358e-01  0.01143401
## V  1.041872e+00 -0.11223585
## X  2.973365e-01  0.02241729
## Y -4.351296e-01  1.15114049
## Z  6.226026e-01  0.46070628
 uuritavad <- dokmeta %>% filter(emakeel %in% c("vene", "soome")) %>%  
      filter(tekstikeel=="eesti") %>% inner_join(doksonaliigid, by="kood")
 uarvud <- uuritavad %>% select(A:Z)
 muudliigid=sapply(1:nrow(uuritavad), 
        function(nr){sum(uarvud[nr, ]*f$loadings[, 1])})
 nimetajad=sapply(1:nrow(uuritavad), 
        function(nr){sum(uarvud[nr, ]*f$loadings[, 2])})
 andmestik=tibble(emakeel=uuritavad$emakeel, muudliigid, nimetajad)
 andmestik %>% ggplot(aes(nimetajad, muudliigid, color=emakeel)) + geom_point()