library(tidyverse)
## -- Attaching packages -------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.0.0 <U+221A> purrr 0.2.5
## <U+221A> tibble 1.4.2 <U+221A> dplyr 0.7.6
## <U+221A> tidyr 0.8.1 <U+221A> stringr 1.3.1
## <U+221A> readr 1.1.1 <U+221A> forcats 0.3.0
## -- Conflicts ----------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
andmed=read_csv("http://www.tlu.ee/~jaagup/andmed/keel/korpus/doksonaliigid.txt")
## Parsed with column specification:
## cols(
## kood = col_character(),
## A = col_integer(),
## C = col_integer(),
## D = col_integer(),
## G = col_integer(),
## H = col_integer(),
## I = col_integer(),
## J = col_integer(),
## K = col_integer(),
## N = col_integer(),
## P = col_integer(),
## S = col_integer(),
## U = col_integer(),
## V = col_integer(),
## X = col_integer(),
## Y = col_integer(),
## Z = col_integer(),
## kokku = col_integer()
## )
head(andmed)
## # A tibble: 6 x 18
## kood A C D G H I J K N P S
## <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 doc_1~ 25 0 14 0 3 0 19 5 3 17 54
## 2 doc_1~ 4 0 5 0 4 0 12 1 3 14 31
## 3 doc_1~ 9 0 6 0 2 0 13 1 3 17 53
## 4 doc_1~ 46 7 50 4 20 0 38 3 2 34 183
## 5 doc_1~ 43 7 49 4 21 0 37 6 2 39 182
## 6 doc_1~ 45 7 51 4 20 0 38 4 2 37 180
## # ... with 6 more variables: U <int>, V <int>, X <int>, Y <int>, Z <int>,
## # kokku <int>
andmed <- andmed %>% filter(S>0)
koodid <- andmed$kood
arvud <- andmed %>% select(A:Z)
k <- prcomp(arvud)
k
## Standard deviations (1, .., p=16):
## [1] 135.4747633 50.0896571 22.3702160 11.9097138 10.8815591
## [6] 8.6494909 8.4137031 6.9975662 6.2364811 5.4485878
## [11] 3.1883229 2.4634667 1.4584338 0.8271939 0.3871350
## [16] 0.2004371
##
## Rotation (n x k) = (16 x 16):
## PC1 PC2 PC3 PC4 PC5
## A -0.1041849602 0.1621765910 -1.156903e-01 0.132294003 0.3233249741
## C -0.0106252296 0.0166247888 -1.444131e-02 -0.005916120 0.0118451553
## D -0.1449092894 0.2825085935 -5.187632e-02 0.197197039 -0.0733779488
## G -0.0113691477 0.0134113541 -6.908072e-03 0.001351147 0.0574571368
## H -0.1091388370 -0.0277987295 3.326823e-01 0.869635455 -0.0905498961
## I 0.0008873210 0.0020126227 7.473663e-03 0.001365954 -0.0015536325
## J -0.1218683252 0.2076541831 -6.604906e-02 0.114379296 0.0206460695
## K -0.0331953679 0.0478755294 -9.209988e-03 0.102391056 0.0338163890
## N -0.0454310760 0.0609312787 1.570972e-01 0.067698317 -0.0319194493
## P -0.1451407569 0.3390797528 -1.562809e-01 0.041859440 -0.6889111043
## S -0.7802315055 -0.4901443563 -3.532559e-01 0.044324756 0.0690329519
## U -0.0010642765 0.0014556391 8.908265e-05 0.004446359 0.0029165440
## V -0.2956483470 0.5803841120 -2.790217e-01 -0.131135911 -0.0067334942
## X -0.0004360576 0.0006177858 5.148343e-04 0.003256219 0.0005155047
## Y -0.1156084268 -0.3020670719 2.953552e-01 -0.230289324 -0.5899425411
## Z -0.4553824154 0.2453983098 7.276721e-01 -0.291856858 0.2199869965
## PC6 PC7 PC8 PC9 PC10
## A -0.1604501768 -4.491844e-02 -0.0091751015 0.2138071129 -0.8558831865
## C -0.0527667104 3.046421e-02 -0.0033003697 -0.0022749388 -0.0072645670
## D -0.6781027311 -4.966642e-01 0.0915604550 0.2081793280 0.2965971937
## G 0.0650969268 -1.277361e-02 -0.0011271838 -0.0969387571 0.1129709270
## H 0.2321919911 4.317765e-02 0.2185023173 -0.0669710781 0.0024799350
## I 0.0058506241 8.274368e-05 -0.0091838807 0.0071245133 0.0133877055
## J -0.2882774939 1.414189e-01 -0.3594413920 -0.8259477258 -0.0722146960
## K -0.0713043095 2.953630e-02 -0.0349752502 0.0648032953 -0.0390123057
## N -0.3783035328 7.895976e-01 -0.1972671203 0.3582211608 0.1331059209
## P 0.2806046573 -9.000413e-02 -0.4622626228 0.2037125354 -0.1354251139
## S 0.0283570508 1.551724e-02 -0.0849050437 0.0513475461 0.0901150581
## U -0.0037515123 -5.592466e-03 -0.0017064911 -0.0025075253 -0.0004161576
## V 0.1786262083 2.695872e-01 0.6002299575 -0.0877734183 0.0795958681
## X 0.0008930388 -2.351791e-03 0.0006119455 -0.0009656594 -0.0008713642
## Y -0.3021570809 4.350647e-02 0.4087607913 -0.1716167983 -0.3293465561
## Z 0.1392737183 -1.451449e-01 -0.1615449172 0.0316086189 0.0278048278
## PC11 PC12 PC13 PC14 PC15
## A 0.145257211 0.0005589865 -0.0117820722 0.0091411699 -7.227232e-04
## C -0.071173204 0.0520173976 0.9914463371 0.0653776338 -2.064410e-02
## D 0.065931693 0.0354895472 -0.0197897126 -0.0044594328 -4.373447e-03
## G 0.726894985 -0.6581170409 0.0886919200 0.0202534753 -1.117926e-03
## H 0.025771211 0.0549791191 0.0211983791 -0.0035277913 -2.119842e-03
## I 0.012752954 0.0374786489 -0.0659598624 0.9966923166 1.563901e-02
## J -0.032195759 0.0476573106 -0.0336138291 0.0017484725 -3.110503e-03
## K -0.651040529 -0.7391833690 -0.0165980899 0.0353427576 -1.380029e-02
## N 0.102506352 0.0236072975 -0.0348100626 -0.0100120244 5.161888e-03
## P 0.045216113 -0.0195756562 0.0192031465 -0.0046407737 1.187877e-03
## S -0.002185184 0.0207224397 -0.0056125193 0.0007276351 4.815301e-05
## U -0.010056322 -0.0099420304 0.0214591716 -0.0136742949 9.994707e-01
## V -0.032795029 0.0185278983 -0.0175889672 0.0038851883 2.771722e-03
## X -0.001834230 -0.0061070551 -0.0019496772 -0.0053762615 1.102347e-02
## Y 0.029662406 -0.0886658617 0.0009608875 0.0120691524 1.839953e-03
## Z -0.027885307 0.0107105989 0.0067378885 -0.0072781017 -1.145542e-03
## PC16
## A 0.0008756747
## C -0.0028174351
## D 0.0006510823
## G 0.0025183647
## H 0.0028603163
## I -0.0053211142
## J 0.0003709263
## K 0.0056062392
## N -0.0025889824
## P 0.0001113755
## S -0.0002707752
## U 0.0111586062
## V -0.0001874964
## X -0.9998925027
## Y -0.0002668718
## Z 0.0001991046
summary(k)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 135.4748 50.0897 22.37022 11.90971 10.88156 8.64949
## Proportion of Variance 0.8378 0.1145 0.02284 0.00648 0.00541 0.00342
## Cumulative Proportion 0.8378 0.9524 0.97523 0.98170 0.98711 0.99052
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 8.41370 6.99757 6.23648 5.44859 3.18832 2.46347
## Proportion of Variance 0.00323 0.00224 0.00178 0.00136 0.00046 0.00028
## Cumulative Proportion 0.99376 0.99599 0.99777 0.99912 0.99959 0.99986
## PC13 PC14 PC15 PC16
## Standard deviation 1.4584 0.82719 0.38713 0.2004
## Proportion of Variance 0.0001 0.00003 0.00001 0.0000
## Cumulative Proportion 1.0000 0.99999 1.00000 1.0000
k <- prcomp(scale(arvud))
summary(k)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.9492 1.2990 1.02354 0.9798 0.93456 0.8953 0.7322
## Proportion of Variance 0.5436 0.1055 0.06548 0.0600 0.05459 0.0501 0.0335
## Cumulative Proportion 0.5436 0.6491 0.71457 0.7746 0.82916 0.8792 0.9128
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 0.65535 0.53055 0.45099 0.38324 0.31458 0.30009
## Proportion of Variance 0.02684 0.01759 0.01271 0.00918 0.00619 0.00563
## Cumulative Proportion 0.93960 0.95719 0.96990 0.97908 0.98527 0.99090
## PC14 PC15 PC16
## Standard deviation 0.26332 0.21866 0.16876
## Proportion of Variance 0.00433 0.00299 0.00178
## Cumulative Proportion 0.99523 0.99822 1.00000
k$rotation
## PC1 PC2 PC3 PC4 PC5
## A -0.31087597 0.146916284 -0.0498581665 0.03213802 -0.027209266
## C -0.25513382 0.147741004 -0.0946818597 0.24474675 0.137864356
## D -0.31387518 0.172756130 0.0128193387 0.02166181 0.003024961
## G -0.16322131 0.043078005 -0.0723912588 -0.27230951 -0.754756383
## H -0.23866986 -0.374697078 0.1452368397 -0.16229444 0.063419389
## I 0.03592187 0.118333384 0.9075071486 -0.16315266 0.035468594
## J -0.31804499 0.142540532 0.0050282986 0.04460358 -0.054036142
## K -0.29898869 0.083440401 -0.0118813285 0.02414109 0.168727700
## N -0.22533396 -0.066274932 0.2812114470 0.24210037 0.153512978
## P -0.29587660 0.227168047 0.0368694207 0.04765719 -0.057451657
## S -0.27270900 -0.379196298 -0.0606859396 0.03836201 -0.083288670
## U -0.14597692 0.067410960 -0.2061864602 -0.24216300 0.555718314
## V -0.31791547 0.193016972 -0.0008011966 0.05573681 -0.091666256
## X -0.11941209 0.009115484 -0.0683804211 -0.82452190 0.140394874
## Y -0.12228974 -0.683333905 0.0196377282 0.06851352 0.015614352
## Z -0.31626163 -0.185987592 0.0813524974 0.01668849 -0.054164007
## PC6 PC7 PC8 PC9 PC10
## A 0.023324833 -0.134762302 0.07215282 -0.10066368 0.352147407
## C 0.088532614 -0.030744187 -0.75076182 -0.43319483 -0.222866215
## D 0.017448347 -0.122706043 0.10913699 0.07129171 -0.134883862
## G -0.383705840 0.319246912 -0.16110160 -0.08468172 0.068033359
## H -0.016632631 0.153988472 0.38814904 -0.57759282 -0.450451365
## I -0.207375247 -0.224354858 -0.16598197 -0.04435773 0.090362445
## J 0.009296958 -0.034661905 0.02933134 0.09708100 -0.073291890
## K 0.097311506 0.003782828 0.26801170 -0.32417916 0.558469796
## N 0.187867785 0.790201913 -0.07240836 0.27471610 0.100017125
## P 0.056094484 -0.178214832 0.17920351 0.33015663 -0.421440376
## S -0.055742998 -0.265178827 -0.12088290 0.04747525 0.286249504
## U -0.725700522 0.121610388 -0.05051864 0.13371700 -0.012483654
## V 0.027733444 -0.116793234 0.05572722 0.17558503 -0.061919762
## X 0.470028333 0.039738795 -0.21190625 0.11541275 0.021468711
## Y -0.028264506 -0.171884447 -0.19781634 0.27333402 -0.004619323
## Z -0.023709191 -0.051016303 0.02688863 0.12802142 -0.033798004
## PC11 PC12 PC13 PC14 PC15
## A -0.643279166 0.10880912 -0.189999039 0.185661566 -0.4714886372
## C 0.085374022 0.03054516 -0.065250397 -0.032277319 -0.0369950787
## D -0.015572003 -0.77171567 -0.172168998 0.353250837 0.2539656572
## G 0.149323498 -0.02653998 -0.062787949 0.053953535 -0.0754102049
## H -0.124223806 0.09909217 0.011773251 0.074415687 0.0228642003
## I 0.020874686 0.01725002 0.011820175 0.031827939 -0.0083425284
## J -0.044018407 -0.05309921 0.914022081 -0.012726856 -0.1245985301
## K 0.598490529 -0.02480311 -0.025046438 -0.082136587 -0.0983042679
## N -0.067896529 0.06237515 -0.051584395 0.127337934 0.0734505498
## P 0.328608425 0.42231060 -0.220176017 0.187671926 -0.2044768335
## S -0.082028099 0.32765910 0.065108236 0.252280155 0.6082258893
## U -0.008236371 0.04145222 -0.014568253 0.001914715 0.0008586619
## V -0.052908707 0.14806498 -0.118395922 -0.239570188 0.2460872774
## X -0.005741255 0.01207584 0.000403574 0.003561098 0.0071747218
## Y 0.202433550 -0.18732634 -0.020244033 0.136682882 -0.4497545969
## Z -0.131018465 -0.17014902 -0.140352854 -0.795083835 0.0181337322
## PC16
## A -0.0306900849
## C -0.0237415546
## D -0.0757117616
## G -0.0030036778
## H 0.0925473240
## I 0.0168094588
## J -0.0277489367
## K 0.0176674510
## N -0.0193225244
## P -0.2973975590
## S -0.2135507193
## U 0.0121068350
## V 0.8008069644
## X 0.0007134055
## Y 0.2666972834
## Z -0.3685574033
f=factanal(arvud, factors=2)
f
##
## Call:
## factanal(x = arvud, factors = 2)
##
## Uniquenesses:
## A C D G H I J K N P S U
## 0.132 0.456 0.100 0.780 0.403 0.981 0.094 0.259 0.609 0.137 0.103 0.852
## V X Y Z
## 0.036 0.903 0.058 0.072
##
## Loadings:
## Factor1 Factor2
## A 0.902 0.232
## C 0.711 0.197
## D 0.924 0.216
## G 0.446 0.148
## H 0.422 0.647
## I -0.130
## J 0.918 0.252
## K 0.818 0.269
## N 0.534 0.326
## P 0.919 0.137
## S 0.510 0.798
## U 0.364 0.121
## V 0.962 0.198
## X 0.291 0.108
## Y 0.969
## Z 0.736 0.621
##
## Factor1 Factor2
## SS loadings 7.142 2.883
## Proportion Var 0.446 0.180
## Cumulative Var 0.446 0.627
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 19648.59 on 89 degrees of freedom.
## The p-value is 0
f=factanal(arvud, factors=3)
f
##
## Call:
## factanal(x = arvud, factors = 3)
##
## Uniquenesses:
## A C D G H I J K N P S U
## 0.124 0.462 0.108 0.777 0.289 0.979 0.103 0.201 0.580 0.127 0.115 0.810
## V X Y Z
## 0.006 0.866 0.005 0.071
##
## Loadings:
## Factor1 Factor2 Factor3
## A 0.817 0.167 0.425
## C 0.658 0.175 0.274
## D 0.841 0.178 0.390
## G 0.436 0.113 0.141
## H 0.266 0.526 0.602
## I -0.136
## J 0.838 0.209 0.388
## K 0.689 0.180 0.540
## N 0.440 0.272 0.390
## P 0.898 0.143 0.216
## S 0.448 0.749 0.351
## U 0.270 0.337
## V 0.945 0.187 0.255
## X 0.213 0.294
## Y 0.980 0.156
## Z 0.653 0.561 0.433
##
## Factor1 Factor2 Factor3
## SS loadings 5.897 2.447 2.032
## Proportion Var 0.369 0.153 0.127
## Cumulative Var 0.369 0.521 0.648
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 13934.94 on 75 degrees of freedom.
## The p-value is 0
f=factanal(scale(arvud), factors=2)
f
##
## Call:
## factanal(x = scale(arvud), factors = 2)
##
## Uniquenesses:
## A C D G H I J K N P S U
## 0.132 0.456 0.100 0.780 0.403 0.981 0.094 0.259 0.609 0.137 0.103 0.852
## V X Y Z
## 0.036 0.903 0.058 0.072
##
## Loadings:
## Factor1 Factor2
## A 0.902 0.232
## C 0.711 0.197
## D 0.924 0.216
## G 0.446 0.148
## H 0.422 0.647
## I -0.130
## J 0.918 0.252
## K 0.818 0.269
## N 0.534 0.326
## P 0.919 0.137
## S 0.510 0.798
## U 0.364 0.121
## V 0.962 0.198
## X 0.291 0.108
## Y 0.969
## Z 0.736 0.621
##
## Factor1 Factor2
## SS loadings 7.142 2.883
## Proportion Var 0.446 0.180
## Cumulative Var 0.446 0.627
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 19648.59 on 89 degrees of freedom.
## The p-value is 0
f=factanal(scale(arvud), factors=3)
f
##
## Call:
## factanal(x = scale(arvud), factors = 3)
##
## Uniquenesses:
## A C D G H I J K N P S U
## 0.124 0.462 0.108 0.777 0.289 0.979 0.103 0.201 0.580 0.127 0.115 0.810
## V X Y Z
## 0.006 0.866 0.005 0.071
##
## Loadings:
## Factor1 Factor2 Factor3
## A 0.817 0.167 0.425
## C 0.658 0.175 0.274
## D 0.841 0.178 0.390
## G 0.436 0.113 0.141
## H 0.266 0.526 0.602
## I -0.136
## J 0.838 0.209 0.388
## K 0.689 0.180 0.540
## N 0.440 0.272 0.390
## P 0.898 0.143 0.216
## S 0.448 0.749 0.351
## U 0.270 0.337
## V 0.945 0.187 0.255
## X 0.213 0.294
## Y 0.980 0.156
## Z 0.653 0.561 0.433
##
## Factor1 Factor2 Factor3
## SS loadings 5.897 2.447 2.032
## Proportion Var 0.369 0.153 0.127
## Cumulative Var 0.369 0.521 0.648
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 13934.94 on 75 degrees of freedom.
## The p-value is 0
f=factanal(scale(arvud), factors=4)
f
##
## Call:
## factanal(x = scale(arvud), factors = 4)
##
## Uniquenesses:
## A C D G H I J K N P S U
## 0.095 0.451 0.101 0.759 0.275 0.929 0.102 0.187 0.485 0.120 0.005 0.804
## V X Y Z
## 0.007 0.871 0.085 0.053
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4
## A 0.789 0.139 0.465 0.219
## C 0.640 0.124 0.321 0.144
## D 0.830 0.160 0.429
## G 0.445 0.133 0.138
## H 0.291 0.634 0.488
## I -0.257
## J 0.830 0.195 0.406
## K 0.662 0.192 0.570 0.112
## N 0.452 0.338 0.384 -0.222
## P 0.897 0.104 0.254
## S 0.474 0.723 0.246 0.432
## U 0.251 0.350
## V 0.940 0.143 0.287
## X 0.204 0.285
## Y 0.942 0.164
## Z 0.682 0.589 0.364
##
## Factor1 Factor2 Factor3 Factor4
## SS loadings 5.819 2.473 1.929 0.449
## Proportion Var 0.364 0.155 0.121 0.028
## Cumulative Var 0.364 0.518 0.639 0.667
##
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 8265.53 on 62 degrees of freedom.
## The p-value is 0
f=factanal(scale(arvud), factors=2, rotation="promax")
f
##
## Call:
## factanal(x = scale(arvud), factors = 2, rotation = "promax")
##
## Uniquenesses:
## A C D G H I J K N P S U
## 0.132 0.456 0.100 0.780 0.403 0.981 0.094 0.259 0.609 0.137 0.103 0.852
## V X Y Z
## 0.036 0.903 0.058 0.072
##
## Loadings:
## Factor1 Factor2
## A 0.960
## C 0.751
## D 0.991
## G 0.462
## H 0.249 0.603
## I -0.137
## J 0.971
## K 0.849
## N 0.497 0.189
## P 1.014 -0.168
## S 0.294 0.748
## U 0.378
## V 1.042 -0.112
## X 0.297
## Y -0.435 1.151
## Z 0.623 0.461
##
## Factor1 Factor2
## SS loadings 7.664 2.568
## Proportion Var 0.479 0.160
## Cumulative Var 0.479 0.639
##
## Factor Correlations:
## Factor1 Factor2
## Factor1 1.000 -0.571
## Factor2 -0.571 1.000
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 19648.59 on 89 degrees of freedom.
## The p-value is 0
names(f)
## [1] "converged" "loadings" "uniquenesses" "correlation"
## [5] "criteria" "factors" "dof" "method"
## [9] "rotmat" "STATISTIC" "PVAL" "n.obs"
## [13] "call"
plot(loadings(f), type="n")
text(loadings(f), rownames(loadings(f)))
Peakomponentide analüüsil tekstide parameetrid esimese kahe komponendi järgi
sum(arvud[1, ] * k$rotation[, 1])
## [1] -63.36461
sum(arvud[1, ] * k$rotation[, 2])
## [1] -8.660654
#esimese viie teksti esimese peakomponendi väärtused
sapply(1:5, function(nr){sum(arvud[nr, ]*k$rotation[, 1])})
## [1] -63.36461 -34.79098 -46.48306 -209.09924 -208.61691
esimesed=sapply(1:5, function(nr){sum(arvud[nr, ]*k$rotation[, 1])})
teised=sapply(1:5, function(nr){sum(arvud[nr, ]*k$rotation[, 2])})
plot(esimesed, teised)
Harjutus
Koostage peakomponentide analüüs doksonaliigid teksti põhjal(tehtud) Kuvage vene emakeelega inimeste tekstid joonisel kahe peakomponendi järgi Kuvage teise värviga soome emakeelega inimeste tekstid samade komponentide järgi
Keeleandmed failis dokmeta, teksti koodi järgi saab kokku ühendada
doksonaliigid=read_csv("http://www.tlu.ee/~jaagup/andmed/keel/korpus/doksonaliigid.txt")
## Parsed with column specification:
## cols(
## kood = col_character(),
## A = col_integer(),
## C = col_integer(),
## D = col_integer(),
## G = col_integer(),
## H = col_integer(),
## I = col_integer(),
## J = col_integer(),
## K = col_integer(),
## N = col_integer(),
## P = col_integer(),
## S = col_integer(),
## U = col_integer(),
## V = col_integer(),
## X = col_integer(),
## Y = col_integer(),
## Z = col_integer(),
## kokku = col_integer()
## )
dokmeta=read_csv("http://www.tlu.ee/~jaagup/andmed/keel/korpus/dokmeta.txt")
## Parsed with column specification:
## cols(
## kood = col_character(),
## korpus = col_character(),
## tekstikeel = col_character(),
## tekstityyp = col_character(),
## elukoht = col_character(),
## taust = col_character(),
## vanus = col_character(),
## sugu = col_character(),
## emakeel = col_character(),
## kodukeel = col_character(),
## keeletase = col_character(),
## haridus = col_character(),
## abivahendid = col_character()
## )
arvud <- andmed %>% select(A:Z)
f=factanal(arvud, factors=2, rotation="promax")
f
##
## Call:
## factanal(x = arvud, factors = 2, rotation = "promax")
##
## Uniquenesses:
## A C D G H I J K N P S U
## 0.132 0.456 0.100 0.780 0.403 0.981 0.094 0.259 0.609 0.137 0.103 0.852
## V X Y Z
## 0.036 0.903 0.058 0.072
##
## Loadings:
## Factor1 Factor2
## A 0.960
## C 0.751
## D 0.991
## G 0.462
## H 0.249 0.603
## I -0.137
## J 0.971
## K 0.849
## N 0.497 0.189
## P 1.014 -0.168
## S 0.294 0.748
## U 0.378
## V 1.042 -0.112
## X 0.297
## Y -0.435 1.151
## Z 0.623 0.461
##
## Factor1 Factor2
## SS loadings 7.664 2.568
## Proportion Var 0.479 0.160
## Cumulative Var 0.479 0.639
##
## Factor Correlations:
## Factor1 Factor2
## Factor1 1.000 -0.571
## Factor2 -0.571 1.000
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 19648.59 on 89 degrees of freedom.
## The p-value is 0
f$loadings[, 1:2]
## Factor1 Factor2
## A 9.603718e-01 -0.05178683
## C 7.514388e-01 -0.02420903
## D 9.913168e-01 -0.07819499
## G 4.617851e-01 0.01303754
## H 2.487480e-01 0.60320553
## I 8.489644e-05 -0.13658900
## J 9.708608e-01 -0.03421937
## K 8.487871e-01 0.02105739
## N 4.974591e-01 0.18935271
## P 1.014478e+00 -0.16794216
## S 2.938078e-01 0.74792925
## U 3.775358e-01 0.01143401
## V 1.041872e+00 -0.11223585
## X 2.973365e-01 0.02241729
## Y -4.351296e-01 1.15114049
## Z 6.226026e-01 0.46070628
uuritavad <- dokmeta %>% filter(emakeel %in% c("vene", "soome")) %>%
filter(tekstikeel=="eesti") %>% inner_join(doksonaliigid, by="kood")
uarvud <- uuritavad %>% select(A:Z)
muudliigid=sapply(1:nrow(uuritavad),
function(nr){sum(uarvud[nr, ]*f$loadings[, 1])})
nimetajad=sapply(1:nrow(uuritavad),
function(nr){sum(uarvud[nr, ]*f$loadings[, 2])})
andmestik=tibble(emakeel=uuritavad$emakeel, muudliigid, nimetajad)
andmestik %>% ggplot(aes(nimetajad, muudliigid, color=emakeel)) + geom_point()