Anda di halaman 1dari 8

>

> setwd("C:/Users/deepak/Desktop/Term 5/Business Intelligence and Analytics")


> library(psych)
> library(lattice)
> library(car)
Loading required package: carData

Attaching package: ‘car’

The following object is masked from ‘package:psych’:

logit

> library(ggplot2)

Attaching package: ‘ggplot2’

The following objects are masked from ‘package:psych’:

%+%, alpha

> library(GGally)
> library(caret)
> library(leaps)
>
>
>
> d = read.csv("cereals_old.csv")
> head(d)
Cereals calories protein fat sodium fiber carbo sugars potass
1 100PercentBran 70 4 1 130 10.0 5.0 6 280
2 100PercentNaturalBran 120 3 5 15 2.0 8.0 8 135
3 AllBran 70 4 1 260 9.0 7.0 5 320
4 AllBranwithExtraFiber 50 4 0 140 14.0 8.0 0 330
5 AppleCinnamonCheerios 110 2 2 180 1.5 10.5 10 70
6 AppleJacks 110 2 0 125 1.0 11.0 14 30
vitamins shelf rating
1 25 3 68.40297
2 0 3 33.98368
3 25 3 59.42551
4 25 3 93.70491
5 25 1 29.50954
6 25 2 33.17409
> nrow(d)
[1] 75
> set.seed(20)
> rec=createDataPartition(y=d$rating, p=0.7, list = F)
> dt=d[rec,]
> dv=d[-rec,]
> nrow(dt)
[1] 55
> nrow(dv)
[1] 20
> names(dt)
[1] "Cereals" "calories" "protein" "fat" "sodium" "fiber" "carbo"
[8] "sugars" "potass" "vitamins" "shelf" "rating"
>
>
>
> reg_model = lm(rating~., data=dt)
>
>
>
> summary(reg_model)

Call:
lm(formula = rating ~ ., data = dt)

Residuals:
ALL 55 residuals are 0: no residual degrees of freedom!

Coefficients: (10 not defined because of singularities)


Estimate Std. Error t value Pr(>|t|)
(Intercept) 68.4030 NA NA NA
Cereals100PercentNaturalBran -34.4193 NA NA NA
CerealsAllBranwithExtraFiber 25.3019 NA NA NA
CerealsAppleCinnamonCheerios -38.8934 NA NA NA
CerealsAppleJacks -35.2289 NA NA NA
CerealsBasic4 -31.3644 NA NA NA
CerealsCapnCrunch -50.3601 NA NA NA
CerealsCheerios -17.6380 NA NA NA
CerealsClusters -28.0028 NA NA NA
CerealsCocoaPuffs -45.6665 NA NA NA
CerealsCornChex -26.9580 NA NA NA
CerealsCornFlakes -22.5396 NA NA NA
CerealsCornPops -32.6202 NA NA NA
CerealsCountChocula -46.0065 NA NA NA
CerealsCracklinOatBran -27.9542 NA NA NA
CerealsCrispix -21.5073 NA NA NA
CerealsCrispyWheat&Raisins -32.2268 NA NA NA
CerealsDoubleChex -24.0721 NA NA NA
CerealsFrostedFlakes -36.9670 NA NA NA
CerealsFrostedMiniWheats -10.0578 NA NA NA
CerealsFruitfulBran -27.3875 NA NA NA
CerealsGoldenCrisp -33.1505 NA NA NA
CerealsGoldenGrahams -44.5989 NA NA NA
CerealsGrapeNuts -15.0320 NA NA NA
CerealsHoneycomb -39.6606 NA NA NA
CerealsHoneyNutCheerios -37.3308 NA NA NA
CerealsJustRightFruit&Nut -31.9315 NA NA NA
CerealsKix -29.1619 NA NA NA
CerealsLife -23.0749 NA NA NA
CerealsMaypo -13.5521 NA NA NA
CerealsMuesliRaisins,Peaches,&Pecans -34.2632 NA NA NA
CerealsMueslixCrispyBlend -38.0896 NA NA NA
CerealsMultiGrainCheerios -28.2970 NA NA NA
CerealsNut&HoneyCrunch -38.4787 NA NA NA
CerealsNutriGrainAlmondRaisin -27.7107 NA NA NA
CerealsOatmealRaisinCrisp -37.9521 NA NA NA
CerealsPostNat.RaisinBran -30.5624 NA NA NA
CerealsPuffedRice -7.6469 NA NA NA
CerealsQuakerOatmeal -17.5746 NA NA NA
CerealsRaisinBran -29.1438 NA NA NA
CerealsRaisinSquares -13.0698 NA NA NA
CerealsRiceChex -26.4040 NA NA NA
CerealsRiceKrispies -27.8428 NA NA NA
CerealsShreddedWheat -0.1671 NA NA NA
CerealsShreddedWheatnBran 6.0700 NA NA NA
CerealsShreddedWheatspoonsize 4.3988 NA NA NA
CerealsSmacks -37.1729 NA NA NA
CerealsSpecialK -15.2716 NA NA NA
CerealsStrawberryFruitWheats -9.0390 NA NA NA
CerealsTotalCornFlakes -29.5632 NA NA NA
CerealsTotalRaisinBran -39.8102 NA NA NA
CerealsTotalWholeGrain -21.7441 NA NA NA
CerealsTrix -40.6497 NA NA NA
CerealsWheatChex -18.6155 NA NA NA
CerealsWheaties -16.8108 NA NA NA
calories NA NA NA NA
protein NA NA NA NA
fat NA NA NA NA
sodium NA NA NA NA
fiber NA NA NA NA
carbo NA NA NA NA
sugars NA NA NA NA
potass NA NA NA NA
vitamins NA NA NA NA
shelf NA NA NA NA

Residual standard error: NaN on 0 degrees of freedom


Multiple R-squared: 1, Adjusted R-squared: NaN
F-statistic: NaN on 54 and 0 DF, p-value: NA

>
>
>
> vif(reg_model)
Error in vif.default(reg_model) :
there are aliased coefficients in the model
>
>
>
> reg_model =
lm(rating~calories+protein+fat+sodium+fiber+carbo+sugars+potass+vitamins, data=dt)
>
>
>
> summary(reg_model)

Call:
lm(formula = rating ~ calories + protein + fat + sodium + fiber +
carbo + sugars + potass + vitamins, data = dt)

Residuals:
Min 1Q Median 3Q Max
-0.153533 -0.021059 -0.004076 0.023421 0.123117

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.477e+01 5.243e-02 1044.6 <2e-16 ***
calories -2.308e-01 9.196e-04 -250.9 <2e-16 ***
protein 3.304e+00 9.740e-03 339.2 <2e-16 ***
fat -1.618e+00 1.120e-02 -144.4 <2e-16 ***
sodium -5.449e-02 9.159e-05 -595.0 <2e-16 ***
fiber 3.465e+00 8.855e-03 391.3 <2e-16 ***
carbo 1.134e+00 3.637e-03 311.7 <2e-16 ***
sugars -6.871e-01 3.836e-03 -179.1 <2e-16 ***
potass -3.451e-02 3.146e-04 -109.7 <2e-16 ***
vitamins -5.132e-02 3.475e-04 -147.7 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.05008 on 45 degrees of freedom


Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 5.094e+05 on 9 and 45 DF, p-value: < 2.2e-16

>
>
>
> anova(reg_model)
Analysis of Variance Table

Response: rating
Df Sum Sq Mean Sq F value Pr(>F)
calories 1 5661.3 5661.3 2257215 < 2.2e-16 ***
protein 1 2491.8 2491.8 993511 < 2.2e-16 ***
fat 1 465.9 465.9 185749 < 2.2e-16 ***
sodium 1 385.5 385.5 153691 < 2.2e-16 ***
fiber 1 568.4 568.4 226620 < 2.2e-16 ***
carbo 1 1689.3 1689.3 673523 < 2.2e-16 ***
sugars 1 149.1 149.1 59434 < 2.2e-16 ***
potass 1 31.8 31.8 12690 < 2.2e-16 ***
vitamins 1 54.7 54.7 21818 < 2.2e-16 ***
Residuals 45 0.1 0.0
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>
>
>
> vif(reg_model)
calories protein fat sodium fiber carbo sugars potass
6.905801 2.781245 2.788454 1.435114 10.411783 5.585179 6.811717 10.413898
vitamins
1.243795
>
>
>
> vif(reg_model)
calories protein fat sodium fiber carbo sugars potass
vitamins
6.905801 2.781245 2.788454 1.435114 10.411783 5.585179 6.811717 10.413898
1.243795
>
>
>
> dw = read.csv("wine.csv")
>
>
>
> head(dw)
Hedonic Meat Dessert Price Sugar Alcohol Acidity
1 14 7 8 7 7 13 7
2 10 7 6 4 3 14 7
3 8 5 5 10 5 12 5
4 2 4 7 16 7 11 3
5 6 2 4 13 3 10 3
6 5 2 3 11 5 11 3
>
>
>
> tail(dw)
Hedonic Meat Dessert Price Sugar Alcohol Acidity
3 8 5 5 10 5 12 5
4 2 4 7 16 7 11 3
5 6 2 4 13 3 10 3
6 5 2 3 11 5 11 3
7 13 7 8 6 7 13 7
8 14 7 7 7 7 13 6
>
>
>
> dw
Hedonic Meat Dessert Price Sugar Alcohol Acidity
1 14 7 8 7 7 13 7
2 10 7 6 4 3 14 7
3 8 5 5 10 5 12 5
4 2 4 7 16 7 11 3
5 6 2 4 13 3 10 3
6 5 2 3 11 5 11 3
7 13 7 8 6 7 13 7
8 14 7 7 7 7 13 6
>
>
>
> names(x = dw)
[1] "Hedonic" "Meat" "Dessert" "Price" "Sugar" "Alcohol" "Acidity"
>
>
>
> cor(dw[-4])
Hedonic Meat Dessert Sugar Alcohol Acidity
Hedonic 1.0000000 0.8241888 0.5995401 0.3220453 0.7717847 0.8917644
Meat 0.8241888 1.0000000 0.8295614 0.4151733 0.9379429 0.9464033
Dessert 0.5995401 0.8295614 1.0000000 0.6963106 0.6257681 0.6957637
Sugar 0.3220453 0.4151733 0.6963106 1.0000000 0.2079616 0.2351093
Alcohol 0.7717847 0.9379429 0.6257681 0.2079616 1.0000000 0.9429499
Acidity 0.8917644 0.9464033 0.6957637 0.2351093 0.9429499 1.0000000
>
>
>
> ?prcomp
> prcomp
function (x, ...)
UseMethod("prcomp")
<bytecode: 0x000000000b3cb878>
<environment: namespace:stats>
>
>
> pca = prcomp(dw[-4], scale. = T, center = T)
>
>
>
> class(pca)
[1] "prcomp"
>
>
>
> pca
Standard deviations (1, .., p=6):
[1] 2.10785278 1.04405376 0.53783271 0.38494941 0.14726536 0.08815476

Rotation (n x k) = (6 x 6):
PC1 PC2 PC3 PC4 PC5 PC6
Hedonic -0.4176342 0.17506445 0.79231208 0.23794689 -0.24087149 0.22921147
Meat -0.4670307 0.06174102 -0.23708912 -0.04893644 -0.57070620 -0.62750199
Dessert -0.4023722 -0.39947102 -0.40752337 0.62405393 0.01666563 0.35031865
Sugar -0.2337133 -0.80561148 0.25703966 -0.45217727 0.13388259 -0.08893831
Alcohol -0.4319072 0.29350232 -0.28901068 -0.58518128 -0.04601834 0.54701975
Acidity -0.4521005 0.26610133 0.01893292 0.06813069 0.77198146 -0.35190848
> pca
Standard deviations (1, .., p=6):
[1] 2.10785278 1.04405376 0.53783271 0.38494941 0.14726536 0.08815476

Rotation (n x k) = (6 x 6):
PC1 PC2 PC3 PC4 PC5 PC6
Hedonic -0.4176342 0.17506445 0.79231208 0.23794689 -0.24087149 0.22921147
Meat -0.4670307 0.06174102 -0.23708912 -0.04893644 -0.57070620 -0.62750199
Dessert -0.4023722 -0.39947102 -0.40752337 0.62405393 0.01666563 0.35031865
Sugar -0.2337133 -0.80561148 0.25703966 -0.45217727 0.13388259 -0.08893831
Alcohol -0.4319072 0.29350232 -0.28901068 -0.58518128 -0.04601834 0.54701975
Acidity -0.4521005 0.26610133 0.01893292 0.06813069 0.77198146 -0.35190848
>
>
>
> names(pca)
[1] "sdev" "rotation" "center" "scale" "x"
>
>
>
> pca$rotation
PC1 PC2 PC3 PC4 PC5 PC6
Hedonic -0.4176342 0.17506445 0.79231208 0.23794689 -0.24087149 0.22921147
Meat -0.4670307 0.06174102 -0.23708912 -0.04893644 -0.57070620 -0.62750199
Dessert -0.4023722 -0.39947102 -0.40752337 0.62405393 0.01666563 0.35031865
Sugar -0.2337133 -0.80561148 0.25703966 -0.45217727 0.13388259 -0.08893831
Alcohol -0.4319072 0.29350232 -0.28901068 -0.58518128 -0.04601834 0.54701975
Acidity -0.4521005 0.26610133 0.01893292 0.06813069 0.77198146 -0.35190848
>
>
>
> pca$center
Hedonic Meat Dessert Sugar Alcohol Acidity
9.000 5.125 6.000 5.500 12.125 5.125
>
>
>
> pca$scale
Hedonic Meat Dessert Sugar Alcohol Acidity
4.503967 2.232071 1.851640 1.772811 1.356203 1.885092
>
>
>
> summary(pca)
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6
Standard deviation 2.1079 1.0441 0.53783 0.3849 0.14727 0.08815
Proportion of Variance 0.7405 0.1817 0.04821 0.0247 0.00361 0.00130
Cumulative Proportion 0.7405 0.9222 0.97039 0.9951 0.99870 1.00000
>
>
>
> screeplot(pca)
>
>
>
> screeplot(pca, type = "line")
>
>
>
> eigen = pca$sdev^2
>
>
>
> eigen
[1] 4.443043321 1.090048251 0.289264029 0.148186050 0.021687086 0.007771263
>
>
>
> pca$x
PC1 PC2 PC3 PC4 PC5 PC6
[1,] -2.2166472 -0.41286870 0.29008612 0.20472286 0.122631407 0.03337577
[2,] -1.2022717 1.89725296 -0.76645805 -0.09189011 -0.017461726 0.05544278
[3,] 0.4718895 0.35592856 0.01033836 -0.21016853 -0.008268467 -0.20694285
[4,] 1.3373322 -1.74401299 -0.89610792 0.01790454 -0.047770682 0.01690521
[5,] 2.8826155 0.60464796 0.31339441 0.71372678 -0.045470511 0.01247330
[6,] 2.6105150 0.08908048 0.43444487 -0.61774126 0.116117052 0.07539924
[7,] -2.1239213 -0.45173765 0.11417182 0.15189234 0.176111264 -0.01751526
[8,] -1.7595119 -0.33829063 0.50013039 -0.16844662 -0.295888339 0.03086182
>
>
>
> scatterplot(dw[-4])
Error in scatterplot.default(dw[-4]) :
argument "y" is missing, with no default
> scatterplotMatrix(dw[-4])
Warning messages:
1: In smoother(x[subs], y[subs], col = smoother.args$col[i], log.x = FALSE, :
could not fit positive part of the spread
2: In smoother(x[subs], y[subs], col = smoother.args$col[i], log.x = FALSE, :
could not fit positive part of the spread
>
>
>
> scatterplotMatrix(pca$x)
>
>
>
> scatterplotMatrix(dw[-4])
Warning messages:
1: In smoother(x[subs], y[subs], col = smoother.args$col[i], log.x = FALSE, :
could not fit positive part of the spread
2: In smoother(x[subs], y[subs], col = smoother.args$col[i], log.x = FALSE, :
could not fit positive part of the spread
>
>
>
> scatterplotMatrix(pca$x)
>
>
>
> biplot(pca)
>
>
>

Anda mungkin juga menyukai