dplyr - Assignment 1
Install DPLYR
install.packages('dplyr')
The dplyr package offers functionality that is easy to understand when working with data tables.
- Offer a package of functions that has been well optimised.
- R does not gain any additional capabilities as a result of this.
- The functionality is really effective.
- Carries out the data processing tasks that are most frequently used
Dataset: https://drive.google.com/file/d/1KVMo0BhkIQkgO_YU7QjVLJ4BLie5VoJK/view?usp=sharing
Code
#select
library(dplyr)
mydata <- read.csv('murders.csv')
mydata
dim(mydata)
str(mydata)
summary(mydata)
apply(X=mydata[,c(4,6)], MARGIN = 2, FUN = mean,na.rm=T)
mydata[c(1,4,5)]
#mydata[c(mydata, state,population,total)]#cannot use
names(mydata)
names(mydata)[c(1,4,5)]
subset <- select(mydata, state:population)
subset
subset <- select(mydata, state,population,total)
subset
subset <- select(mydata, -(abb:region))
subset
#filter
library(dplyr)
mydata <- read.csv('murders.csv')
mydata
dim(mydata)
str(mydata)
names(mydata)
subset <- filter(mydata,total>=100)
subset
subset <- filter(mydata,total>=100 & population >= 10,000,000)
subset
subset1 <- select(subset, state,population,total)
subset1
subset1 <- select(subset, -(abb:region))
subset1
summary(subset)
summary(subset1)
summary(subset1$population)
summary(subset1$total)
#Arrange
library(dplyr)
mydata <- read.csv('murders.csv')
mydata
names(mydata)
subset <- arrange(mydata,population)
subset
subset1 <- select(subset,state,population,total)
subset1
head(subset1,5)
tail(subset1,5)
subset <- arrange(mydata,desc(population))
subset
subset1 <- select(subset,state,population,total)
subset1
head(subset1,5)
tail(subset1,5)
#rename
library(dplyr)
mydata <- read.csv('murders.csv')
mydata
dim(mydata)
str(mydata)
names(mydata)
mydata1 <- rename(mydata, abbrivation = abb, homocide = total)
mydata1
#mutate data frame
library(dplyr)
mydata <- read.csv('murders.csv')
names(mydata)
mydata1 <- mutate(mydata,ratio=murders/population)
names(mydata1)
mydata1
subset <- select(mydata1, state,population,murders,ratio)
subset
mydata1 <- transmute(mydata,ratio=murders/population)
names(mydata1)
mydata1
mydata1 <- transmute(mydata,state = state, ratio=total/population)
names(mydata1)
mydata
#Generate Summary Statistics
library(dplyr)
mydata <- read.csv('murders.csv')
mydata
dim(mydata)
str(mydata)
names(mydata)
mydata1 <- group_by(mydata,region)
summarize(mydata1,sum(murders))
#Pipeline Operator
library(dplyr)
mydata <- read.csv('murders.csv')
names(mydata)
group_by(mydata,region) %>% summarize(sum(total))
mutate(mydata,ratio=total/population) %>% select(state,population,total,ratio)
mutate(mydata,ratio=total/population) %>% arrange(ratio) %>% select(state,total,ratio)
mutate(mydata,ratio=total/population) %>% arrange(ratio) %>% select(state,total,ratio) %>% head(5)
Comments
Post a Comment