Anda di halaman 1dari 5

Pertemuan-14.

LENOVO

2022-06-20

library (readr)
mydata <- read.csv("hr_dataset_v16.csv")
head (mydata)

## marital emp_stat department perfscore salary gender citizen race engagement


## 1 0 0 5 4 62506 1 1 1 4.60
## 2 1 1 3 3 104437 1 1 1 4.96
## 3 1 1 5 3 64955 0 1 1 3.02
## 4 1 0 5 3 64991 0 1 1 4.84
## 5 0 1 5 3 50825 0 1 1 5.00
## 6 0 0 5 4 57568 0 1 1 5.00
## satisfaction num_project absences
## 1 5 0 1
## 2 3 6 17
## 3 3 0 3
## 4 5 0 15
## 5 4 0 2
## 6 5 0 15

#untuk melihat nama-nama kolom yg ada di data kita


data1 <- mydata[, c(4, 5, 10, 12)]
head (data1)

## perfscore salary satisfaction absences


## 1 4 62506 5 1
## 2 3 104437 3 17
## 3 3 64955 3 3
## 4 3 64991 5 15
## 5 3 50825 4 2
## 6 4 57568 5 15

correlation1 <- cor (data1, method = "spearman")


round(correlation1, 3)

## perfscore salary satisfaction absences


## perfscore 1.000 0.088 0.209 0.022
## salary 0.088 1.000 0.040 0.079
## satisfaction 0.209 0.040 1.000 0.074
## absences 0.022 0.079 0.074 1.000

1
library (Hmisc)

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## Loading required package: ggplot2

##
## Attaching package: ’Hmisc’

## The following objects are masked from ’package:base’:


##
## format.pval, units

correlation3 <- rcorr(as.matrix(data1), type = "spearman")


correlation3

## perfscore salary satisfaction absences


## perfscore 1.00 0.09 0.21 0.02
## salary 0.09 1.00 0.04 0.08
## satisfaction 0.21 0.04 1.00 0.07
## absences 0.02 0.08 0.07 1.00
##
## n= 311
##
##
## P
## perfscore salary satisfaction absences
## perfscore 0.1207 0.0002 0.6970
## salary 0.1207 0.4792 0.1636
## satisfaction 0.0002 0.4792 0.1949
## absences 0.6970 0.1636 0.1949

#regression
head (mydata)

## marital emp_stat department perfscore salary gender citizen race engagement


## 1 0 0 5 4 62506 1 1 1 4.60
## 2 1 1 3 3 104437 1 1 1 4.96
## 3 1 1 5 3 64955 0 1 1 3.02
## 4 1 0 5 3 64991 0 1 1 4.84
## 5 0 1 5 3 50825 0 1 1 5.00
## 6 0 0 5 4 57568 0 1 1 5.00
## satisfaction num_project absences
## 1 5 0 1
## 2 3 6 17
## 3 3 0 3
## 4 5 0 15
## 5 4 0 2
## 6 5 0 15

2
data2 <- mydata[, c(1, 2, 3, 4, 5, 7, 11, 12)]
head (data2)

## marital emp_stat department perfscore salary citizen num_project absences


## 1 0 0 5 4 62506 1 0 1
## 2 1 1 3 3 104437 1 6 17
## 3 1 1 5 3 64955 1 0 3
## 4 1 0 5 3 64991 1 0 15
## 5 0 1 5 3 50825 1 0 2
## 6 0 0 5 4 57568 1 0 15

regression2 <- lm(formula= salary ~ marital+ emp_stat + department + perfscore + citizen+ num_project +
summary(regression2)

##
## Call:
## lm(formula = salary ~ marital + emp_stat + department + perfscore +
## citizen + num_project + absences, data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -47374 -8994 -1788 5353 178605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66688.5 13106.1 5.088 6.35e-07 ***
## marital -896.7 2539.6 -0.353 0.7243
## emp_stat -1560.8 2658.9 -0.587 0.5576
## department -3015.9 1852.2 -1.628 0.1045
## perfscore 4030.8 2106.7 1.913 0.0566 .
## citizen -4603.5 5545.3 -0.830 0.4071
## num_project 4298.1 852.8 5.040 8.03e-07 ***
## absences 414.6 211.7 1.958 0.0511 .
## ---
## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1
##
## Residual standard error: 21500 on 303 degrees of freedom
## Multiple R-squared: 0.2862, Adjusted R-squared: 0.2697
## F-statistic: 17.35 on 7 and 303 DF, p-value: < 2.2e-16

regression3 <- lm(formula = log(salary) ~ marital+ emp_stat + department + perfscore + citizen+ num_proj
summary(regression3)

##
## Call:
## lm(formula = log(salary) ~ marital + emp_stat + department +
## perfscore + citizen + num_project + absences, data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.61677 -0.12360 0.00452 0.09766 1.37186
##

3
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.984599 0.137722 79.759 < 2e-16 ***
## marital -0.004698 0.026687 -0.176 0.8604
## emp_stat -0.011917 0.027940 -0.427 0.6700
## department -0.013941 0.019463 -0.716 0.4744
## perfscore 0.042884 0.022138 1.937 0.0537 .
## citizen -0.066153 0.058271 -1.135 0.2572
## num_project 0.061048 0.008962 6.812 5.19e-11 ***
## absences 0.004284 0.002225 1.925 0.0551 .
## ---
## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1
##
## Residual standard error: 0.2259 on 303 degrees of freedom
## Multiple R-squared: 0.3436, Adjusted R-squared: 0.3284
## F-statistic: 22.66 on 7 and 303 DF, p-value: < 2.2e-16

regression4 <- lm(formula = log(salary) ~ marital+ emp_stat + department + perfscore + department*perfsc


summary(regression4)

##
## Call:
## lm(formula = log(salary) ~ marital + emp_stat + department +
## perfscore + department * perfscore + citizen + num_project +
## absences, data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.66717 -0.12179 0.00391 0.10071 1.36482
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.126900 0.429394 23.584 < 2e-16 ***
## marital -0.003416 0.026543 -0.129 0.8977
## emp_stat -0.009454 0.027807 -0.340 0.7341
## department 0.161031 0.085248 1.889 0.0599 .
## perfscore 0.336358 0.140980 2.386 0.0177 *
## citizen -0.075725 0.058121 -1.303 0.1936
## num_project 0.059232 0.008953 6.616 1.68e-10 ***
## absences 0.004164 0.002213 1.882 0.0609 .
## department:perfscore -0.059357 0.028164 -2.108 0.0359 *
## ---
## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1
##
## Residual standard error: 0.2246 on 302 degrees of freedom
## Multiple R-squared: 0.3531, Adjusted R-squared: 0.336
## F-statistic: 20.6 on 8 and 302 DF, p-value: < 2.2e-16

#logit regression
logit1 <- glm(formula = emp_stat ~ salary + marital + department + perfscore + citizen+ num_project + ab
summary(logit1)

##

4
## Call:
## glm(formula = emp_stat ~ salary + marital + department + perfscore +
## citizen + num_project + absences, family = "binomial", data = data2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6375 -0.9207 -0.7319 1.2880 2.1791
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.323e+00 1.379e+00 1.685 0.0921 .
## salary -4.320e-06 6.196e-06 -0.697 0.4856
## marital 2.705e-01 2.547e-01 1.062 0.2882
## department -3.006e-01 1.898e-01 -1.583 0.1133
## perfscore -3.217e-01 2.104e-01 -1.529 0.1263
## citizen -6.666e-01 5.314e-01 -1.254 0.2097
## num_project -2.423e-01 9.731e-02 -2.490 0.0128 *
## absences 3.891e-02 2.168e-02 1.795 0.0727 .
## ---
## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 396.37 on 310 degrees of freedom
## Residual deviance: 377.72 on 303 degrees of freedom
## AIC: 393.72
##
## Number of Fisher Scoring iterations: 4

actual_response <- data2$emp_stat


predicted_response <- round(fitted(logit1))
outcomes <- table ( predicted_response, actual_response)
outcomes

## actual_response
## predicted_response 0 1
## 0 200 94
## 1 7 10

Anda mungkin juga menyukai