Anda di halaman 1dari 1

C H E AT S H E E T

Machine Learning
Algorithms
( Python and R Codes)

Types
Supervised Learning

Unsupervised Learning

Reinforcement Learning

Decision Tree Random Forest


kNN
Logistic Regression

Apriori algorithm k-means


Hierarchical Clustering

Markov Decision Process


Q Learning

Linear
Regression

Python
Code

R
Code

#Import Library

#Load Train and Test datasets

#Import other necessary libraries like pandas,

#Identify feature and response variable(s) and

#numpy...

#values must be numeric and numpy arrays

from sklearn import linear_model

x_train <- input_variables_values_training_datasets

#Load Train and Test datasets

y_train <- target_variables_values_training_datasets

#Identify feature and response variable(s) and

x_test <- input_variables_values_test_datasets

#values must be numeric and numpy arrays

x <- cbind(x_train,y_train)

x_train=input_variables_values_training_datasets

#Train the model using the training sets and

y_train=target_variables_values_training_datasets

#check score

x_test=input_variables_values_test_datasets

linear <- lm(y_train ~ ., data = x)

#Create linear regression object

summary(linear)

linear = linear_model.LinearRegression()

#Predict Output

#Train the model using the training sets and

predicted= predict(linear,x_test)

#check score
linear.fit(x_train, y_train)
linear.score(x_train, y_train)
#Equation coefficient and Intercept
print('Coefficient: \n', linear.coef_)
print('Intercept: \n', linear.intercept_)
#Predict Output

Logistic
Regression

predicted= linear.predict(x_test)

#Import Library

x <- cbind(x_train,y_train)

from sklearn.linear_model import LogisticRegression

#Train the model using the training sets and check

#Assumed you have, X (predictor) and Y (target)

#score

#for training data set and x_test(predictor)

logistic <- glm(y_train ~ ., data = x,family='binomial')

#of test_dataset

summary(logistic)

#Create logistic regression object

#Predict Output

model = LogisticRegression()

predicted= predict(logistic,x_test)

#Train the model using the training sets


#and check score
model.fit(X, y)
model.score(X, y)
#Equation coefficient and Intercept
print('Coefficient: \n', model.coef_)
print('Intercept: \n', model.intercept_)
#Predict Output

Decision Tree

predicted= model.predict(x_test)

#Import Library

#Import Library

#Import other necessary libraries like pandas, numpy...

library(rpart)

from sklearn import tree

x <- cbind(x_train,y_train)

#Assumed you have, X (predictor) and Y (target) for

#grow tree

#training data set and x_test(predictor) of

fit <- rpart(y_train ~ ., data = x,method="class")

#test_dataset

summary(fit)

#Create tree object

#Predict Output

model = tree.DecisionTreeClassifier(criterion='gini')

predicted= predict(fit,x_test)

#for classification, here you can change the


#algorithm as gini or entropy (information gain) by
#default it is gini
#model = tree.DecisionTreeRegressor() for
#regression
#Train the model using the training sets and check
#score
model.fit(X, y)
model.score(X, y)
#Predict Output

Naive Bayes

SVM (Support Vector Machine)

predicted= model.predict(x_test)

#Import Library

#Import Library

from sklearn import svm

library(e1071)

#Assumed you have, X (predictor) and Y (target) for

x <- cbind(x_train,y_train)

#training data set and x_test(predictor) of test_dataset

#Fitting model

#Create SVM classification object

fit <-svm(y_train ~ ., data = x)

model = svm.svc()

summary(fit)

#there are various options associated

#Predict Output

with it, this is simple for classification.

predicted= predict(fit,x_test)

#Train the model using the training sets and check


#score
model.fit(X, y)
model.score(X, y)
#Predict Output
predicted= model.predict(x_test)

#Import Library

#Import Library

from sklearn.naive_bayes import GaussianNB

library(e1071)

#Assumed you have, X (predictor) and Y (target) for

x <- cbind(x_train,y_train)

#training data set and x_test(predictor) of test_dataset

#Fitting model

#Create SVM classification object model = GaussianNB()

fit <-naiveBayes(y_train ~ ., data = x)

#there is other distribution for multinomial classes

summary(fit)

like Bernoulli Naive Bayes

#Predict Output

#Train the model using the training sets and check

predicted= predict(fit,x_test)

#score
model.fit(X, y)
#Predict Output

k-Means

kNN (k- Nearest Neighbors)

predicted= model.predict(x_test)

#Import Library

#Import Library

from sklearn.neighbors import KNeighborsClassifier

library(knn)

#Assumed you have, X (predictor) and Y (target) for

x <- cbind(x_train,y_train)

#training data set and x_test(predictor) of test_dataset

#Fitting model

#Create KNeighbors classifier object model


KNeighborsClassifier(n_neighbors=6)
#default value for n_neighbors is 5
#Train the model using the training sets and check score

fit <-knn(y_train ~ ., data = x,k=5)


summary(fit)
#Predict Output
predicted= predict(fit,x_test)

model.fit(X, y)
#Predict Output
predicted= model.predict(x_test)

#Import Library

#Import Library

from sklearn.cluster import KMeans

library(cluster)

#Assumed you have, X (attributes) for training data set

fit <- kmeans(X, 3)

#and x_test(attributes) of test_dataset

#5 cluster solution

#Create KNeighbors classifier object model


k_means = KMeans(n_clusters=3, random_state=0)
#Train the model using the training sets and check score
model.fit(X)
#Predict Output

Random Forest

predicted= model.predict(x_test)

#Import Library

#Import Library

from sklearn.ensemble import RandomForestClassifier

library(randomForest)

#Assumed you have, X (predictor) and Y (target) for

x <- cbind(x_train,y_train)

#training data set and x_test(predictor) of test_dataset

#Fitting model

#Create Random Forest object

fit <- randomForest(Species ~ ., x,ntree=500)

model= RandomForestClassifier()

summary(fit)

#Train the model using the training sets and check score

#Predict Output

model.fit(X, y)

predicted= predict(fit,x_test)

#Predict Output

Gradient Boosting & AdaBoost

Dimensionality Reduction Algorithms

predicted= model.predict(x_test)

#Import Library

#Import Library

from sklearn import decomposition

library(stats)

#Assumed you have training and test data set as train and

pca <- princomp(train, cor = TRUE)

#test

train_reduced

#Create PCA object pca= decomposition.PCA(n_components=k)

test_reduced

<- predict(pca,train)
<- predict(pca,test)

#default value of k =min(n_sample, n_features)


#For Factor analysis
#fa= decomposition.FactorAnalysis()
#Reduced the dimension of training dataset using PCA
train_reduced = pca.fit_transform(train)
#Reduced the dimension of test dataset
test_reduced = pca.transform(test)

#Import Library

#Import Library

from sklearn.ensemble import GradientBoostingClassifier

library(caret)

#Assumed you have, X (predictor) and Y (target) for

x <- cbind(x_train,y_train)

#training data set and x_test(predictor) of test_dataset

#Fitting model

#Create Gradient Boosting Classifier object

fitControl <- trainControl( method = "repeatedcv",

model= GradientBoostingClassifier(n_estimators=100, \

+ number = 4, repeats = 4)

learning_rate=1.0, max_depth=1, random_state=0)

fit <- train(y ~ ., data = x, method = "gbm",

#Train the model using the training sets and check score

+ trControl = fitControl,verbose = FALSE)

model.fit(X, y)

predicted= predict(fit,x_test,type= "prob")[,2]

#Predict Output
predicted= model.predict(x_test)

To view complete guide on Machine Learning Algorithms, visit here :

http://bit.ly/1DOUS8N
w w w . a n a l y t i c s v i d h y a . c o m

Anda mungkin juga menyukai