Você está na página 1de 3

# importing excel data

library(gdata)
Electronic = read.xls("Electronic.xls")
View(Electronic)

getwd()

############## Lesson 5 Apply function ############################


ls()
View(HSB)
mean(HSB$math)
median(HSB$math)
quantile(HSB$math)

# creating a new variable in dataframe


Math1 = transform(HSB, math = math+1)
View(Math1)

# Create new variable with if then condition


Math1 = transform(HSB, MathUpdated = ifelse(math>50,math,math+1))
View(Math1)

Math1 = transform(HSB, MathUpdated = ifelse(math>50,math,50))


View(Math1)

# complex ifelse condition


Math1 = transform(HSB, Math_Level = ifelse(math>70,"H",ifelse(math>40,"M","L")))
View(Math1)

min(HSB$math)
max(HSB$math)

MathAndScienceAverage = transform(HSB, Math_scince = (math+science)/2 )


View(MathAndScienceAverage)

mean(HSB$math)
# OR
apply(cbind(HSB$math),2,mean)

MathAndScienceAverage = transform(HSB, Math_scince = (math+science)/2 )


# OR
Average_M_S= apply(cbind(HSB$math,HSB$science),1,mean)
Average_M_S
View(Average_M_S)
class(Average_M_S)

HSB1 = cbind(HSB,Average_M_S)
View(HSB1)

# maths and science mean vertically


apply(cbind(HSB$math,HSB$science),2,mean)

## lapply function: Class is list


mean(HSB$math)
mean(HSB$science)
mean(HSB$read)
# OR
lapply(HSB,mean)

MeanOfAllSubjects = lapply(HSB[,7:11],mean)
# below will not give desired output. Cbind not to be used in lapply function
lapply(cbind(HSB$read,HSB$write),mean)

class(MeanOfAllSubjects)

NewDataFrame = cbind(HSB$read,HSB$write)
View(NewDataFrame)
# OR
NewDataFrame = HSB[,7:8]
View(NewDataFrame)

# sapply function - Class is numeric/Int Vector


SumThroughSapply = sapply(HSB,sum)
class(SumThroughSapply)
is.vector(SumThroughSapply)

## apply: col wise and row wise functions. Define each variable. Class of object is
vector
## lapply: Performs function on all variables by default. Col wise function. Class
is list
## sapply: similar to lapply function with only difference that class is numeric
vector
class(HSB)
is.data.frame(HSB)

# tapply function
tapply(HSB$math,HSB$female,mean)

# where female = 1
library(dplyr) # PENDING DPLYR package
HSBFemaleData = filter(HSB, female == 1)
View(HSBFemaleData)

tapply(HSBFemaleData$math,HSBFemaleData$female,mean)

### complicated scenerio with 2 variables


tapply(HSB$math,HSB[,c("female","ses")],mean)

class(tapply(HSB$math,HSB[,c("female","ses")],mean))
# Below will not work
class(tapply(HSB$math,cbind(HSB$female,HSB$ses),mean))

### dplyr package


library(dplyr)
# Select few variables
View(mtcars)

mtcarsWithGearMilage = mtcars[,c(1,10)]
View(mtcarsWithGearMilage)
# OR
mtcarsWithGearMilage = select(mtcars,gear,mpg)
View(mtcarsWithGearMilage)

View(iris)
irisSubset = select(iris,starts_with("Petal"))
View(irisSubset)

irisSubset = select(iris,ends_with("Width"))
View(irisSubset)

# filter data
View(mtcars)
mtcarsSubset = filter(mtcars, mpg>20)
View(mtcarsSubset)

# AND CONDITION
mtcarsSubset = filter(mtcars, mpg>20 & hp > 100 & gear>3)
View(mtcarsSubset)

# OR condition
mtcarsSubset = filter(mtcars, mpg>20 | hp > 100)
View(mtcarsSubset)

mtcarsSubset = arrange(mtcars, hp)


View(mtcarsSubset)

mtcarsSubset = arrange(mtcars, desc(hp))


View(mtcarsSubset)

MTSUBSET = mtcars[mtcars$mp>20 & mtcars$hp >100,]


View(MTSUBSET)

## transform vs Mutate function


View(HSB)
HSB1 = mutate(HSB, MathsUPdated = math +5, Maths_Science = MathsUPdated+science)
View(HSB1)

Você também pode gostar