limit()
memory.size(max=T)
rm(list=ls())
install.packages("tidyverse")
library(tidyverse) # data manipulation
install.packages("cluster")
library(cluster) # clustering algorithms
install.packages("factoextra")
library(factoextra) # clustering algorithms & visualization
library(dplyr)
setwd("C:/users/JOSEPH/Documents/codes")
getwd()
# Import test data
data<-read.csv("malware_dataset1.csv")
print(summary(data))
#Student: I added the below command. At line 20 I was unable to scale data. So
converted all the data to numeric.
data=as.data.frame(sapply(data, as.numeric))
data.1<-select(data,13:23)
data1 <- na.omit(data.1)
print(summary(data1))
#As we don't want the clustering algorithm to depend to an arbitrary
#variable unit, we start by scaling data using the R function scale:
# Plot clusters
fviz_cluster(km, data = data1)
## Equalities : ----------------------------------
cbind(cl[c("betweenss", "tot.withinss", "totss")], # the same two columns
c(ss(fitted.data1), ss(resid.data1), ss(data1)))
stopifnot(all.equal(cl$ totss, ss(data1)),
all.equal(cl$ tot.withinss, ss(resid.data1)),
## these three are the same:
all.equal(cl$ betweenss, ss(fitted.data1)),
all.equal(cl$ betweenss, cl$totss - cl$tot.withinss),
## and hence also
all.equal(ss(data1), ss(fitted.data1) + ss(resid.data1))
)