R Functions

Session-1 (Intro & Quick start)
# working dir
setwd("D:/R-BA")
# clear environment & history
rm(list = ls())
write("", file=".blank")
loadhistory(".blank")
unlink(".blank")
print
class
length
is.numeric
is.integer
is.character
is.vector
# vector of integers
c1 <- c(1L,2L,3L,4L,5L,6L,7L,8L,9L)
# vector of numeric / real numbers
c2 <- c(1,2,3,4,5,6,7,8,9)
# vector of numeric / real numbers
c3 <- c(1.1,2.2,3.3,4.4,5.5)
# vector of integers with :#
c4 <- c(1:20)
# vector of string
c5 <- c("aaa","bbb","ccc","ddd","xxx","yyy","zzz")
as.numeric
as.logical
as.character
c3
c3
c3
c3
<<<<-
c1
c1
c1
c1
+2
-2
*2
/2
# recycling rule - if two vectors are of unequal length, the shorter

# vector will be recycled in order to match the longer vector
# list
lst <- list(c1,c2,x,y)
is.list
# factors
vct <- c(1,0,1,0,0,0,0,1,1,1)
vct.f <- factor(vct, labels = c("private", "public"))
is.factor
# Matrices
m <- matrix(1:6, nrow = 2, ncol = 3)
# how matrix is populated using a vector & dim()

dim(m) <- c(2, 5)
i <- 2L
m2 <- m1
m2 <- m1
m2 <- m1
m2 <- m1
+i
-i
*i
/i
m3 <- m1 %*% m2 (Matrices Multiplication - Works)

m3 <- m1 %/% m2 (Matrices Division - no division allowed or possible)
# data frames
dfr <- data.frame(foo = 1:4, bar = c(T, T, F, F))
class(dfr)
names(dfr)
row.names(dfr)
attributes(dfr) - (gives information of the whole table, $names, $row.names, $class)
nrow(dfr)
ncol(dfr)
# Subsetting
x <- c("a", "b", "c", "d", "e", "f","a")
x[1]
x[2]
x[1:4]
x[x > "c"]
dfr <- data.frame(foo = 1:9, bar = c(T, T, F, F,T, T, F, F, NA), buf=rep("string",9))
rep("string",9)
# subsetting dataframes cols
dfr$foo
# show column foo
dfr$bar
# show column bar
dfr[,1]
# get the first col
dfr[,2]
# get the second col
dfr[,ncol(dfr)]
# get last col as ncol will give the total no of col
# subsetting dataframes row
dfr[1,]
# get the first row
dfr[3,]
# get the third row
dfr[nrow(dfr),]
# get last row
dfr["1",]
dfr['2',]
# get the row with name '1'

# get the row with name '2'
dfr[1:2,]
dfr[-1,]
# get the rows 1 to 2

# get the all rows except 1
dfr[-3:-7,]
dfr[,1:2]
dfr[,-1]
dfr[,-2:-3]
dfr[,-3:-3]
# get the all rows except 1 to 2, 8 to 9

# get the cols 1 to 2
# get the all cols except 1
# get the all rows except 2 to 3
# get the all rows except 3 to 3
x <- runif(10, min=0, max=10)

x
length(x)
summary(x)
sum(x)
min(x)
max(x)
median(x)
mode(x)
sd(x)
round(x)
floor(x)
ceiling(x)
trunc(x)
Session-2 (Reading & Checking data)

# check NA, NaN, Inf
numX <- NA
is.na(numX)
# check NAN
numX <- 0 / 0
is.na(numX)
is.nan(numX)
# check inf
numX <- 1 / 0
is.na(numX) (This will be false if its infinite)
is.infinite(numX)
#cleaning NAs from single vector
vcbBad <- is.na(vciX) (This will give T, F Values)
vciX[!vcbBad] (This will convert back to Values)
vcbGood <- complete.cases(vciX, vcsY)
vciX[vcbGood] (It will compare vciX with all the completed case of vcbGood)
vcsY[vcbGood] (It will compare vcsY with all the completed case of vcbGood)
# Basic Functions with Data Frames
names(dfr)
head(dfr)
tail(dfr)
length(dfr)
length(dfr$foo)
nrow(dfr)
ncol(dfr)
attributes(dfr)
sum(dfr$bar)
min(dfr$bar)
max(dfr$bar)
median(dfr$bar)
mode(dfr$bar)
sd(dfr$bar)
summary(dfr)
data.frame(foo.sd=sd(dfr$foo),bar.sd=sd(dfr$bar),buf.sd=sd(dfr$buf))
# Ready To Use R Datasets
library(datasets)
data()
# use airquality dataset
airquality
head(airquality)
nrow(airquality)
# get dataset of complete rows

vcbGoodAir <- complete.cases(airquality) (Get Complete Case of Dataset in T,F value)
dfrGoodAir <- airquality[vcbGoodAir, ] (Put the values into DFR so that it can convert back)
# use read.csv
dfrNifty <- read.csv("./data/nifty-data.csv", header=T, stringsAsFactors=F)
head(dfrNifty)
View(dfrNifty)
attributes(dfrNifty)
summary(dfrNifty)
nrow(dfrNifty)
ncol(dfrNifty)
# readLines
vcsUNProfile <- readLines("./data/un-profile.txt")
head(vcsUNProfile)
length(vcsUNProfile)
# view files
file.show("session-1.r")
file.show("./data/un-profile.txt")
file.show("./data/nifty-data.csv")
# readLines to read URL
conGoogle <- url("http://www.google.com/", "r")
vcsGoogle <- readLines(conGoogle)
close(conGoogle)
length(vcsGoogle)
head(vcsGoogle,10)
vcsGoogle[7]
#
#
#
#
#
#
#
#
#
Control Structures
Control Structures if, else (Testing a condition)
Control Structures for (execute a loop fixed number of times)
Control Structures while (execute a loop while a condition is true)
Control Structures repeat (execute an infinite loop)
Control Structures break (break the execution of the loop)
Control Structures skip (skip an iteration of a loop)
Control Structures return (exit a function)
User Defined Function
# simple function without return statement

addNumbers <- function(numA, numB) {
numSum <- numA + numB
numSum }
addNumbers(1,2)
# simple function with two return statement
addNumbers <- function(numA, numB=0) {
if ( (numA==0) && (numB==0) ) {
return (NA) }
numSum <- numA + numB
return (numSum)
}
addNumbers(0)
addNumbers(1)
addNumbers(1,2)
Session-3 (Writing data / Date-time functions)

# write csv
write.csv(dfr,"filename.csv",row.names=F)
file.show("filename.csv")
# write lines after reading text file
vcsUNProfile <- readLines("G:/NMIMS/Sem 1/R/R-BA/data/un-profile.txt")
vcsUNProfile <- substr(vcsUNProfile,1,50)
writeLines(vcsUNProfile,"filename.txt")
file.show("filename.txt")
# write rds (R Data Set)

dfr <- data.frame(foo=1:100, bar=x <- runif(10, min=0, max=10), buf=rnorm(100))
saveRDS(dfr,"filename.rds")
file.show("filename.rds")
# date yyyy-mm-dd hh:mm:ss
datX <- as.Date("2015-10-01 13:45:32")
class(datX)
# POSIXlt yyyy-mm-dd hh:mm:ss
xltX <- as.POSIXlt("2015-10-01 13:45:32")
class(xltX)
# POSIXct yyyy-mm-dd hh:mm:ss
xctX <- as.POSIXct("2015-10-01 13:45:32")
class(xctX)
Sys.time()
class(Sys.time())
# extract date / time using format()
# standard date format
format(Sys.time(), "%c")
# mm/dd/yy format
format(Sys.time(), "%D")
# yyyy-mm-dd - iso 8601 format
format(Sys.time(), "%F")
# day of week
format(Sys.time(), "%a")
format(Sys.time(), "%A")
# day of month
format(Sys.time(), "%d")
# month
format(Sys.time(), "%m")
format(Sys.time(), "%b")
format(Sys.time(), "%B")
# year
format(Sys.time(), "%y")
format(Sys.time(), "%Y")
# full date
format(Sys.time(), "%a %d-%b-%Y")
format(Sys.time(), "%a %d-%m-%Y")
# extract date / time using format()

format(Sys.time(), "%X")
# time to second accuracy
format(Sys.time(), "%H:%M:%S")
# time to sub-second accuracy (if supported by the OS)
format(Sys.time(), "%H:%M:%OS3")
# locale-specific version of date / time
format(Sys.time(), "%a %b %d %Y %X %Z")
Session-4 (Data manipulation using dplyr)
install.packages('dplyr')
install.packages('tidyr')
install.packages('data.table')
dfrNifty <- read.csv("G:/NMIMS/Sem 1/R/R-BA/data/nifty-data.csv", header=T,
stringsAsFactors=F)
dfrNifty <- data.table(dfrNifty)
# filter
dfrNifty.ACC <- filter(dfrNifty, Symbol == "ACC")
nrow(dfrNifty.ACC)
View(dfrNifty.ACC)
# subset of rows based on AND condition
dfrNifty.Filt <- filter(dfrNifty, (dfrNifty$DateDate >= "2014-12-01" & dfrNifty$DateDate <=
"2014-12-05") )
# subset of rows based on OR condition
dfrNifty.Filt <- filter(dfrNifty, (dfrNifty$DateDate == "2014-12-01" | dfrNifty$DateDate ==
"2014-12-05") )
# subset of rows based on "starts-with" condition ... start search string with ^
dfrNifty.Tmp <- slice(dfrNifty, 1:50)
dfrNifty.Filt <- filter(dfrNifty.Tmp, grepl("^TATA",dfrNifty.Tmp$NameOfTheSecurityInNse))
# subset of rows based on "ends-with" condition ... end search string with $
dfrNifty.Filt <- filter(dfrNifty.Tmp, grepl("LTD$",dfrNifty.Tmp$NameOfTheSecurityInNse))
# subset of rows based on "contains" condition ... end search string with +
dfrNifty.Filt <- filter(dfrNifty.Tmp, !grepl("BANK+",dfrNifty.Tmp$Symbol))
# subset of rows by position or row-range ... last 10
fr <- as.integer(nrow(dfrNifty)-9)
to <- nrow(dfrNifty)
dfrNifty.Slcd <- slice(dfrNifty, fr:to)

R Functions

Diunggah oleh

Informasi Dokumen

Judul Asli

Hak Cipta

Format Tersedia

Bagikan dokumen Ini

Bagikan atau Tanam Dokumen

Opsi Berbagi

Apakah menurut Anda dokumen ini bermanfaat?

Apakah konten ini tidak pantas?

Hak Cipta:

Format Tersedia

R Functions

Diunggah oleh

Hak Cipta:

Format Tersedia

Session-1 (Intro & Quick start)

# recycling rule - if two vectors are of unequal length, the shorter

# how matrix is populated using a vector & dim()

m3 <- m1 %*% m2 (Matrices Multiplication - Works)

# get the row with name '1'

# get the rows 1 to 2

# get the all rows except 1 to 2, 8 to 9

x <- runif(10, min=0, max=10)

Session-2 (Reading & Checking data)

# get dataset of complete rows

# simple function without return statement

Session-3 (Writing data / Date-time functions)

# write rds (R Data Set)

# extract date / time using format()

Session-4 (Data manipulation using dplyr)

Anda mungkin juga menyukai