logit
> library(ggplot2)
%+%, alpha
> library(GGally)
> library(caret)
> library(leaps)
>
>
>
> d = read.csv("cereals_old.csv")
> head(d)
Cereals calories protein fat sodium fiber carbo sugars potass
1 100PercentBran 70 4 1 130 10.0 5.0 6 280
2 100PercentNaturalBran 120 3 5 15 2.0 8.0 8 135
3 AllBran 70 4 1 260 9.0 7.0 5 320
4 AllBranwithExtraFiber 50 4 0 140 14.0 8.0 0 330
5 AppleCinnamonCheerios 110 2 2 180 1.5 10.5 10 70
6 AppleJacks 110 2 0 125 1.0 11.0 14 30
vitamins shelf rating
1 25 3 68.40297
2 0 3 33.98368
3 25 3 59.42551
4 25 3 93.70491
5 25 1 29.50954
6 25 2 33.17409
> nrow(d)
[1] 75
> set.seed(20)
> rec=createDataPartition(y=d$rating, p=0.7, list = F)
> dt=d[rec,]
> dv=d[-rec,]
> nrow(dt)
[1] 55
> nrow(dv)
[1] 20
> names(dt)
[1] "Cereals" "calories" "protein" "fat" "sodium" "fiber" "carbo"
[8] "sugars" "potass" "vitamins" "shelf" "rating"
>
>
>
> reg_model = lm(rating~., data=dt)
>
>
>
> summary(reg_model)
Call:
lm(formula = rating ~ ., data = dt)
Residuals:
ALL 55 residuals are 0: no residual degrees of freedom!
>
>
>
> vif(reg_model)
Error in vif.default(reg_model) :
there are aliased coefficients in the model
>
>
>
> reg_model =
lm(rating~calories+protein+fat+sodium+fiber+carbo+sugars+potass+vitamins, data=dt)
>
>
>
> summary(reg_model)
Call:
lm(formula = rating ~ calories + protein + fat + sodium + fiber +
carbo + sugars + potass + vitamins, data = dt)
Residuals:
Min 1Q Median 3Q Max
-0.153533 -0.021059 -0.004076 0.023421 0.123117
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.477e+01 5.243e-02 1044.6 <2e-16 ***
calories -2.308e-01 9.196e-04 -250.9 <2e-16 ***
protein 3.304e+00 9.740e-03 339.2 <2e-16 ***
fat -1.618e+00 1.120e-02 -144.4 <2e-16 ***
sodium -5.449e-02 9.159e-05 -595.0 <2e-16 ***
fiber 3.465e+00 8.855e-03 391.3 <2e-16 ***
carbo 1.134e+00 3.637e-03 311.7 <2e-16 ***
sugars -6.871e-01 3.836e-03 -179.1 <2e-16 ***
potass -3.451e-02 3.146e-04 -109.7 <2e-16 ***
vitamins -5.132e-02 3.475e-04 -147.7 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>
>
>
> anova(reg_model)
Analysis of Variance Table
Response: rating
Df Sum Sq Mean Sq F value Pr(>F)
calories 1 5661.3 5661.3 2257215 < 2.2e-16 ***
protein 1 2491.8 2491.8 993511 < 2.2e-16 ***
fat 1 465.9 465.9 185749 < 2.2e-16 ***
sodium 1 385.5 385.5 153691 < 2.2e-16 ***
fiber 1 568.4 568.4 226620 < 2.2e-16 ***
carbo 1 1689.3 1689.3 673523 < 2.2e-16 ***
sugars 1 149.1 149.1 59434 < 2.2e-16 ***
potass 1 31.8 31.8 12690 < 2.2e-16 ***
vitamins 1 54.7 54.7 21818 < 2.2e-16 ***
Residuals 45 0.1 0.0
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>
>
>
> vif(reg_model)
calories protein fat sodium fiber carbo sugars potass
6.905801 2.781245 2.788454 1.435114 10.411783 5.585179 6.811717 10.413898
vitamins
1.243795
>
>
>
> vif(reg_model)
calories protein fat sodium fiber carbo sugars potass
vitamins
6.905801 2.781245 2.788454 1.435114 10.411783 5.585179 6.811717 10.413898
1.243795
>
>
>
> dw = read.csv("wine.csv")
>
>
>
> head(dw)
Hedonic Meat Dessert Price Sugar Alcohol Acidity
1 14 7 8 7 7 13 7
2 10 7 6 4 3 14 7
3 8 5 5 10 5 12 5
4 2 4 7 16 7 11 3
5 6 2 4 13 3 10 3
6 5 2 3 11 5 11 3
>
>
>
> tail(dw)
Hedonic Meat Dessert Price Sugar Alcohol Acidity
3 8 5 5 10 5 12 5
4 2 4 7 16 7 11 3
5 6 2 4 13 3 10 3
6 5 2 3 11 5 11 3
7 13 7 8 6 7 13 7
8 14 7 7 7 7 13 6
>
>
>
> dw
Hedonic Meat Dessert Price Sugar Alcohol Acidity
1 14 7 8 7 7 13 7
2 10 7 6 4 3 14 7
3 8 5 5 10 5 12 5
4 2 4 7 16 7 11 3
5 6 2 4 13 3 10 3
6 5 2 3 11 5 11 3
7 13 7 8 6 7 13 7
8 14 7 7 7 7 13 6
>
>
>
> names(x = dw)
[1] "Hedonic" "Meat" "Dessert" "Price" "Sugar" "Alcohol" "Acidity"
>
>
>
> cor(dw[-4])
Hedonic Meat Dessert Sugar Alcohol Acidity
Hedonic 1.0000000 0.8241888 0.5995401 0.3220453 0.7717847 0.8917644
Meat 0.8241888 1.0000000 0.8295614 0.4151733 0.9379429 0.9464033
Dessert 0.5995401 0.8295614 1.0000000 0.6963106 0.6257681 0.6957637
Sugar 0.3220453 0.4151733 0.6963106 1.0000000 0.2079616 0.2351093
Alcohol 0.7717847 0.9379429 0.6257681 0.2079616 1.0000000 0.9429499
Acidity 0.8917644 0.9464033 0.6957637 0.2351093 0.9429499 1.0000000
>
>
>
> ?prcomp
> prcomp
function (x, ...)
UseMethod("prcomp")
<bytecode: 0x000000000b3cb878>
<environment: namespace:stats>
>
>
> pca = prcomp(dw[-4], scale. = T, center = T)
>
>
>
> class(pca)
[1] "prcomp"
>
>
>
> pca
Standard deviations (1, .., p=6):
[1] 2.10785278 1.04405376 0.53783271 0.38494941 0.14726536 0.08815476
Rotation (n x k) = (6 x 6):
PC1 PC2 PC3 PC4 PC5 PC6
Hedonic -0.4176342 0.17506445 0.79231208 0.23794689 -0.24087149 0.22921147
Meat -0.4670307 0.06174102 -0.23708912 -0.04893644 -0.57070620 -0.62750199
Dessert -0.4023722 -0.39947102 -0.40752337 0.62405393 0.01666563 0.35031865
Sugar -0.2337133 -0.80561148 0.25703966 -0.45217727 0.13388259 -0.08893831
Alcohol -0.4319072 0.29350232 -0.28901068 -0.58518128 -0.04601834 0.54701975
Acidity -0.4521005 0.26610133 0.01893292 0.06813069 0.77198146 -0.35190848
> pca
Standard deviations (1, .., p=6):
[1] 2.10785278 1.04405376 0.53783271 0.38494941 0.14726536 0.08815476
Rotation (n x k) = (6 x 6):
PC1 PC2 PC3 PC4 PC5 PC6
Hedonic -0.4176342 0.17506445 0.79231208 0.23794689 -0.24087149 0.22921147
Meat -0.4670307 0.06174102 -0.23708912 -0.04893644 -0.57070620 -0.62750199
Dessert -0.4023722 -0.39947102 -0.40752337 0.62405393 0.01666563 0.35031865
Sugar -0.2337133 -0.80561148 0.25703966 -0.45217727 0.13388259 -0.08893831
Alcohol -0.4319072 0.29350232 -0.28901068 -0.58518128 -0.04601834 0.54701975
Acidity -0.4521005 0.26610133 0.01893292 0.06813069 0.77198146 -0.35190848
>
>
>
> names(pca)
[1] "sdev" "rotation" "center" "scale" "x"
>
>
>
> pca$rotation
PC1 PC2 PC3 PC4 PC5 PC6
Hedonic -0.4176342 0.17506445 0.79231208 0.23794689 -0.24087149 0.22921147
Meat -0.4670307 0.06174102 -0.23708912 -0.04893644 -0.57070620 -0.62750199
Dessert -0.4023722 -0.39947102 -0.40752337 0.62405393 0.01666563 0.35031865
Sugar -0.2337133 -0.80561148 0.25703966 -0.45217727 0.13388259 -0.08893831
Alcohol -0.4319072 0.29350232 -0.28901068 -0.58518128 -0.04601834 0.54701975
Acidity -0.4521005 0.26610133 0.01893292 0.06813069 0.77198146 -0.35190848
>
>
>
> pca$center
Hedonic Meat Dessert Sugar Alcohol Acidity
9.000 5.125 6.000 5.500 12.125 5.125
>
>
>
> pca$scale
Hedonic Meat Dessert Sugar Alcohol Acidity
4.503967 2.232071 1.851640 1.772811 1.356203 1.885092
>
>
>
> summary(pca)
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6
Standard deviation 2.1079 1.0441 0.53783 0.3849 0.14727 0.08815
Proportion of Variance 0.7405 0.1817 0.04821 0.0247 0.00361 0.00130
Cumulative Proportion 0.7405 0.9222 0.97039 0.9951 0.99870 1.00000
>
>
>
> screeplot(pca)
>
>
>
> screeplot(pca, type = "line")
>
>
>
> eigen = pca$sdev^2
>
>
>
> eigen
[1] 4.443043321 1.090048251 0.289264029 0.148186050 0.021687086 0.007771263
>
>
>
> pca$x
PC1 PC2 PC3 PC4 PC5 PC6
[1,] -2.2166472 -0.41286870 0.29008612 0.20472286 0.122631407 0.03337577
[2,] -1.2022717 1.89725296 -0.76645805 -0.09189011 -0.017461726 0.05544278
[3,] 0.4718895 0.35592856 0.01033836 -0.21016853 -0.008268467 -0.20694285
[4,] 1.3373322 -1.74401299 -0.89610792 0.01790454 -0.047770682 0.01690521
[5,] 2.8826155 0.60464796 0.31339441 0.71372678 -0.045470511 0.01247330
[6,] 2.6105150 0.08908048 0.43444487 -0.61774126 0.116117052 0.07539924
[7,] -2.1239213 -0.45173765 0.11417182 0.15189234 0.176111264 -0.01751526
[8,] -1.7595119 -0.33829063 0.50013039 -0.16844662 -0.295888339 0.03086182
>
>
>
> scatterplot(dw[-4])
Error in scatterplot.default(dw[-4]) :
argument "y" is missing, with no default
> scatterplotMatrix(dw[-4])
Warning messages:
1: In smoother(x[subs], y[subs], col = smoother.args$col[i], log.x = FALSE, :
could not fit positive part of the spread
2: In smoother(x[subs], y[subs], col = smoother.args$col[i], log.x = FALSE, :
could not fit positive part of the spread
>
>
>
> scatterplotMatrix(pca$x)
>
>
>
> scatterplotMatrix(dw[-4])
Warning messages:
1: In smoother(x[subs], y[subs], col = smoother.args$col[i], log.x = FALSE, :
could not fit positive part of the spread
2: In smoother(x[subs], y[subs], col = smoother.args$col[i], log.x = FALSE, :
could not fit positive part of the spread
>
>
>
> scatterplotMatrix(pca$x)
>
>
>
> biplot(pca)
>
>
>