In [4]:
In [5]:
Out[5]:
In [6]:
Out[6]:
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 1/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
In [7]:
Out[7]:
variety
Setosa 50
Versicolor 50
Virginica 50
dtype: int64
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 2/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
In [8]:
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 3/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
Out[8]:
variety
Setosa [[AxesSubplot(0.125,0.551739;0.336957x0.328261...
Versicolor [[AxesSubplot(0.125,0.551739;0.336957x0.328261...
Virginica [[AxesSubplot(0.125,0.551739;0.336957x0.328261...
dtype: object
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 4/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 5/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 6/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
In [9]:
#Langkah 6. #Untuk mengenali ada tidaknya missing data, ternyata hasil menunjukkan
tida k adanya data yang kosong
iris.isnull().sum()
iris.isna().sum()
Out[9]:
sepal.length 0
sepal.width 0
petal.length 0
petal.width 0
variety 0
dtype: int64
In [18]:
namafaktor = ["sepal.length","sepal.width","petal.length","petal.width"]
X = iris[namafaktor]
y = iris['variety']
In [10]:
C:\Users\Acer\Anaconda3\lib\site-packages\sklearn\ensemble\weight_boostin
g.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy m
odule and should not be imported. It will be removed in a future NumPy rel
ease.
from numpy.core.umath_tests import inner1d
In [11]:
#Langkah 15. Kita akan menginisialiasi model-model klasifikasi dengan parameter yang
su dah baku (default) dan menambahkan parameter tersebut dalam daftar model
models = []
models.append(('KNN', KNeighborsClassifier()))
models.append(('SVC', SVC()))
models.append(('LR', LogisticRegression()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('GNB', GaussianNB()))
models.append(('RF', RandomForestClassifier()))
models.append(('GB', GradientBoostingClassifier()))
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 7/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
In [12]:
In [19]:
In [20]:
names = []
scores = []
for name, model in models:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
scores.append(accuracy_score(y_test, y_pred))
names.append(name)
tr_split = pd.DataFrame({'Nama': names, 'Nilai': scores})
print(tr_split)
Nama Nilai
0 KNN 1.000000
1 SVC 1.000000
2 LR 0.973684
3 DT 0.973684
4 GNB 0.973684
5 RF 0.973684
6 GB 0.973684
In [21]:
names = []
scores = []
for name, model in models:
kfold = KFold(n_splits=10, random_state=10)
score = cross_val_score(model, X, y, cv=kfold, scoring='accuracy').mean()
names.append(name)
scores.append(score)
kf_cross_val = pd.DataFrame({'Nama': names, 'Nilai': scores})
print(kf_cross_val)
Nama Nilai
0 KNN 0.933333
1 SVC 0.953333
2 LR 0.880000
3 DT 0.946667
4 GNB 0.946667
5 RF 0.953333
6 GB 0.926667
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 8/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
In [22]:
plt.show()
In [23]:
# Importing
libraries import
pandas as pd import
numpy as np import
math
import operator
In [24]:
# Start of STEP 1
# Importing data
data = pd.read_csv("iris.csv")
# End of STEP 1
data.head()
Out[24]:
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 9/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
In [25]:
distances = {}
sort = {}
length = testInstance.shape[1]
distances[x] = dist[0]
# End of STEP 3.1
neighbors = []
if response in classVotes:
classVotes[response] += 1
else:
classVotes[response] = 1
# End of STEP 3.4
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 10/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
In [26]:
In [28]:
# Predicted
class
print(result)
# Nearest neighbor
print(neigh)
Virginica
[141]
In [29]:
Virginica
[141, 139, 120]
In [30]:
Virginica
[141, 139, 120, 145, 144]
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 11/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
In [32]:
# Predicted class
print(neigh.predict(test))
# 3 nearest neighbors
print(neigh.kneighbors(test)[1])
['Virginica']
[[141 139 120]]
In [13]:
#Logistik Regression
#Pada bagian ini kita memanggil data dimana dari proses cross validation
get_ipython().magic(u'matplotlib inline')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
irs = pd.read_csv('iris.csv')
iris.columns
iris.head()
Out[13]:
In [16]:
X = iris[['sepal.length','petal.length']].values y
= iris[['variety']].values
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 12/13
19/2/2019 Analisis Data Iris - Paulina Ade Cahyanti (662016006) & Jessica Ordelia (662016018)
In [17]:
#Pada bagian ini kita akan melakukan transformasi dimana data ( 2 var di atas
:Glukosa dan BMI mempunyai standar deviasi 1 dan rata-rata 0
Rata-rata: (0, 0)
Standard deviasi: (1, 1)
In [18]:
[[-0.90068117 -1.34022653]
[-1.14301691 -1.34022653]
[-1.38535265 -1.39706395]
[-1.50652052 -1.2833891 ]
[-1.02184904 -1.34022653]
[-0.53717756 -1.16971425]
[-1.50652052 -1.34022653]
[-1.02184904 -1.2833891 ]
[-1.74885626 -1.34022653]
[-1.14301691 -1.2833891 ]]
In [ ]:
# Karena dalam penyusunan algoritma logistic regression masih kurang tepat maka proses
tidak dapat dilanjutkan, terjadi eror yang sangat banyak dan masih harus dilakukan
peninjauan ulang terhadap tahap-tahap awal pada bagian logistic regression
file:///C:/Users/Acer/Downloads/Analisis%20Data%20Iris%20-%20Paulina%20Ade%20Cahyanti%20(662016006)%20&%20Jessica%20Ordelia… 13/13