Anda di halaman 1dari 8

Session-1 (Intro & Quick start)

# working dir
setwd("D:/R-BA")
# clear environment & history
rm(list = ls())
write("", file=".blank")
loadhistory(".blank")
unlink(".blank")
print
class
length
is.numeric
is.integer
is.character
is.vector
# vector of integers
c1 <- c(1L,2L,3L,4L,5L,6L,7L,8L,9L)
# vector of numeric / real numbers
c2 <- c(1,2,3,4,5,6,7,8,9)
# vector of numeric / real numbers
c3 <- c(1.1,2.2,3.3,4.4,5.5)
# vector of integers with :#
c4 <- c(1:20)
# vector of string
c5 <- c("aaa","bbb","ccc","ddd","xxx","yyy","zzz")
as.numeric
as.logical
as.character
c3
c3
c3
c3

<<<<-

c1
c1
c1
c1

+2
-2
*2
/2

# recycling rule - if two vectors are of unequal length, the shorter


# vector will be recycled in order to match the longer vector
# list
lst <- list(c1,c2,x,y)
is.list
# factors
vct <- c(1,0,1,0,0,0,0,1,1,1)
vct.f <- factor(vct, labels = c("private", "public"))

is.factor
# Matrices
m <- matrix(1:6, nrow = 2, ncol = 3)

# how matrix is populated using a vector & dim()


dim(m) <- c(2, 5)

i <- 2L
m2 <- m1
m2 <- m1
m2 <- m1
m2 <- m1

+i
-i
*i
/i

m3 <- m1 %*% m2 (Matrices Multiplication - Works)


m3 <- m1 %/% m2 (Matrices Division - no division allowed or possible)
# data frames
dfr <- data.frame(foo = 1:4, bar = c(T, T, F, F))
class(dfr)
names(dfr)
row.names(dfr)
attributes(dfr) - (gives information of the whole table, $names, $row.names, $class)
nrow(dfr)
ncol(dfr)
# Subsetting
x <- c("a", "b", "c", "d", "e", "f","a")
x[1]
x[2]
x[1:4]
x[x > "c"]
dfr <- data.frame(foo = 1:9, bar = c(T, T, F, F,T, T, F, F, NA), buf=rep("string",9))
rep("string",9)
# subsetting dataframes cols
dfr$foo
# show column foo
dfr$bar
# show column bar
dfr[,1]
# get the first col
dfr[,2]
# get the second col
dfr[,ncol(dfr)]
# get last col as ncol will give the total no of col
# subsetting dataframes row
dfr[1,]
# get the first row
dfr[3,]
# get the third row
dfr[nrow(dfr),]
# get last row

dfr["1",]
dfr['2',]

# get the row with name '1'


# get the row with name '2'

dfr[1:2,]
dfr[-1,]

# get the rows 1 to 2


# get the all rows except 1

dfr[-3:-7,]
dfr[,1:2]
dfr[,-1]
dfr[,-2:-3]
dfr[,-3:-3]

# get the all rows except 1 to 2, 8 to 9


# get the cols 1 to 2
# get the all cols except 1
# get the all rows except 2 to 3
# get the all rows except 3 to 3

x <- runif(10, min=0, max=10)


x
length(x)
summary(x)
sum(x)
min(x)
max(x)
median(x)
mode(x)
sd(x)
round(x)
floor(x)
ceiling(x)
trunc(x)

Session-2 (Reading & Checking data)


# check NA, NaN, Inf
numX <- NA
is.na(numX)
# check NAN

numX <- 0 / 0
is.na(numX)
is.nan(numX)
# check inf
numX <- 1 / 0
is.na(numX) (This will be false if its infinite)
is.infinite(numX)
#cleaning NAs from single vector
vcbBad <- is.na(vciX) (This will give T, F Values)
vciX[!vcbBad] (This will convert back to Values)
vcbGood <- complete.cases(vciX, vcsY)
vciX[vcbGood] (It will compare vciX with all the completed case of vcbGood)
vcsY[vcbGood] (It will compare vcsY with all the completed case of vcbGood)
# Basic Functions with Data Frames
names(dfr)
head(dfr)
tail(dfr)
length(dfr)
length(dfr$foo)
nrow(dfr)
ncol(dfr)
attributes(dfr)
sum(dfr$bar)
min(dfr$bar)
max(dfr$bar)
median(dfr$bar)
mode(dfr$bar)
sd(dfr$bar)
summary(dfr)
data.frame(foo.sd=sd(dfr$foo),bar.sd=sd(dfr$bar),buf.sd=sd(dfr$buf))
# Ready To Use R Datasets
library(datasets)
data()
# use airquality dataset
airquality
head(airquality)
nrow(airquality)

# get dataset of complete rows


vcbGoodAir <- complete.cases(airquality) (Get Complete Case of Dataset in T,F value)
dfrGoodAir <- airquality[vcbGoodAir, ] (Put the values into DFR so that it can convert back)
# use read.csv
dfrNifty <- read.csv("./data/nifty-data.csv", header=T, stringsAsFactors=F)
head(dfrNifty)
View(dfrNifty)
attributes(dfrNifty)
summary(dfrNifty)

nrow(dfrNifty)
ncol(dfrNifty)
# readLines
vcsUNProfile <- readLines("./data/un-profile.txt")
head(vcsUNProfile)
length(vcsUNProfile)
# view files
file.show("session-1.r")
file.show("./data/un-profile.txt")
file.show("./data/nifty-data.csv")
# readLines to read URL
conGoogle <- url("http://www.google.com/", "r")
vcsGoogle <- readLines(conGoogle)
close(conGoogle)
length(vcsGoogle)
head(vcsGoogle,10)
vcsGoogle[7]
#
#
#
#
#
#
#
#
#

Control Structures
Control Structures if, else (Testing a condition)
Control Structures for (execute a loop fixed number of times)
Control Structures while (execute a loop while a condition is true)
Control Structures repeat (execute an infinite loop)
Control Structures break (break the execution of the loop)
Control Structures skip (skip an iteration of a loop)
Control Structures return (exit a function)
User Defined Function

# simple function without return statement


addNumbers <- function(numA, numB) {
numSum <- numA + numB
numSum }
addNumbers(1,2)
# simple function with two return statement
addNumbers <- function(numA, numB=0) {
if ( (numA==0) && (numB==0) ) {
return (NA) }
numSum <- numA + numB
return (numSum)
}
addNumbers(0)
addNumbers(1)
addNumbers(1,2)

Session-3 (Writing data / Date-time functions)


# write csv
write.csv(dfr,"filename.csv",row.names=F)
file.show("filename.csv")
# write lines after reading text file
vcsUNProfile <- readLines("G:/NMIMS/Sem 1/R/R-BA/data/un-profile.txt")
vcsUNProfile <- substr(vcsUNProfile,1,50)
writeLines(vcsUNProfile,"filename.txt")
file.show("filename.txt")

# write rds (R Data Set)


dfr <- data.frame(foo=1:100, bar=x <- runif(10, min=0, max=10), buf=rnorm(100))
saveRDS(dfr,"filename.rds")
file.show("filename.rds")
# date yyyy-mm-dd hh:mm:ss
datX <- as.Date("2015-10-01 13:45:32")
class(datX)
# POSIXlt yyyy-mm-dd hh:mm:ss
xltX <- as.POSIXlt("2015-10-01 13:45:32")
class(xltX)
# POSIXct yyyy-mm-dd hh:mm:ss
xctX <- as.POSIXct("2015-10-01 13:45:32")
class(xctX)
Sys.time()
class(Sys.time())
# extract date / time using format()
# standard date format
format(Sys.time(), "%c")
# mm/dd/yy format
format(Sys.time(), "%D")
# yyyy-mm-dd - iso 8601 format
format(Sys.time(), "%F")
# day of week
format(Sys.time(), "%a")
format(Sys.time(), "%A")
# day of month
format(Sys.time(), "%d")
# month
format(Sys.time(), "%m")
format(Sys.time(), "%b")
format(Sys.time(), "%B")
# year
format(Sys.time(), "%y")
format(Sys.time(), "%Y")
# full date
format(Sys.time(), "%a %d-%b-%Y")
format(Sys.time(), "%a %d-%m-%Y")

# extract date / time using format()


format(Sys.time(), "%X")
# time to second accuracy
format(Sys.time(), "%H:%M:%S")
# time to sub-second accuracy (if supported by the OS)
format(Sys.time(), "%H:%M:%OS3")
# locale-specific version of date / time
format(Sys.time(), "%a %b %d %Y %X %Z")

Session-4 (Data manipulation using dplyr)

install.packages('dplyr')
install.packages('tidyr')
install.packages('data.table')
dfrNifty <- read.csv("G:/NMIMS/Sem 1/R/R-BA/data/nifty-data.csv", header=T,
stringsAsFactors=F)
dfrNifty <- data.table(dfrNifty)
# filter
dfrNifty.ACC <- filter(dfrNifty, Symbol == "ACC")
nrow(dfrNifty.ACC)
View(dfrNifty.ACC)
# subset of rows based on AND condition
dfrNifty.Filt <- filter(dfrNifty, (dfrNifty$DateDate >= "2014-12-01" & dfrNifty$DateDate <=
"2014-12-05") )
# subset of rows based on OR condition
dfrNifty.Filt <- filter(dfrNifty, (dfrNifty$DateDate == "2014-12-01" | dfrNifty$DateDate ==
"2014-12-05") )
# subset of rows based on "starts-with" condition ... start search string with ^
dfrNifty.Tmp <- slice(dfrNifty, 1:50)
dfrNifty.Filt <- filter(dfrNifty.Tmp, grepl("^TATA",dfrNifty.Tmp$NameOfTheSecurityInNse))
# subset of rows based on "ends-with" condition ... end search string with $
dfrNifty.Tmp <- slice(dfrNifty, 1:50)
dfrNifty.Filt <- filter(dfrNifty.Tmp, grepl("LTD$",dfrNifty.Tmp$NameOfTheSecurityInNse))
# subset of rows based on "contains" condition ... end search string with +
dfrNifty.Tmp <- slice(dfrNifty, 1:50)
dfrNifty.Filt <- filter(dfrNifty.Tmp, !grepl("BANK+",dfrNifty.Tmp$Symbol))
# subset of rows by position or row-range ... last 10
fr <- as.integer(nrow(dfrNifty)-9)
to <- nrow(dfrNifty)
dfrNifty.Slcd <- slice(dfrNifty, fr:to)

Anda mungkin juga menyukai