Qualitative and Quantitative Data
Qualitative Data / Categorical
shirts <- c('S','M','L','XL','XXL','M','L','XL','XXL','S','M')
shirts
shirt_sizes <- factor(shirts)
shirt_sizes
str(shirt_sizes)
#Doesnt list all the levels, gives numeric representation
#as an integer vector, and integer values corresponding to levels
summary(shirt_sizes)
shirts[2]
shirt_sizes[2]
levels(shirts)
levels(shirt_sizes)#gives the listing of the levels
table(shirt_sizes)
baloons<-c('Blue','Blue','Red','Red','Blue','Yellow','Green')
baloons
baloon_colors<-factor(baloons)
baloon_colors
str(baloon_colors)
summary(baloon_colors)
baloons[1]
baloon_colors[1]
levels(baloon_colors)
table(baloon_colors)
Visualizing Qualitative Data
shirts <- c('S','M','L','XL','XXL','M','M','L','L','XXL','M')
shirt_sizes <- factor(shirts)
shirts_table <- table(shirt_sizes)
shirts_table
barplot(shirts_table)
barplot(shirts_table,col=c('blue','green','red','yellow','Black'), ylab='Count')
shirt_sizes=='M'
sum(shirt_sizes=='M')
plot(shirt_sizes)
#Generic function in R to plot graphs, gives the same result as above
plot(shirt_sizes,col=c('blue','green','red','yellow','Black'), ylab='Count')
pie(shirts_table)
pie(shirts_table,col=c('blue','green','red','yellow','Black'))
age<-factor(c(2,4,3,3,2,1,1,2,3,4,2,3,3,4,1,3,2,1,4,3,2,4))
table(age)
levels(age)
levels(age)<-c("<14","15-24","25-34",">35")
table(age)
barplot(table(age))
#Quantitative / Continuous
songs <- c(5.3,3.6,5.5,4.7,6.7,4.3,4.3,8.9,5.1,5.8,4.4)
songs
length(songs)
max(songs)
min(songs)
sum(songs)
prod(songs)
sort(songs)
sort(songs,decreasing = TRUE)
ratings <- c(2,4,3,3,2,1,1,2,3,4,2,3,3,4,1,3,2,1,4,3,2,4)
ratings
length(ratings)
max(ratings)
min(ratings)
sum(ratings)
prod(ratings)
sort(ratings)
sort(ratings,decreasing = TRUE)
Visualizing Quantitative Data
ratings <- c(2,4,3,3,2,1,1,2,3,4,2,3,3,4,1,3,2,1,4,3,2,4)
length(ratings)
summary(ratings)
hist(ratings)
hist(ratings,prob=TRUE,col="grey")
#Another useful way to visualize quantitative data, is to
#use density estimate, its the emphirical way of estimating data
lines(density(ratings),col="red")
hist(ratings,probability=TRUE,col="grey")
lines(density(ratings),col="red")
hist(ratings,probability=TRUE,col="grey",ylim=c(0,0.6))
lines(density(ratings),col="red")
boxplot(ratings)
plot(ratings)
stripchart(ratings)
stripchart(ratings,method="jitter")
stripchart(ratings,method="stack")
Visualizing Stock Quantitative Data
library(dplyr)
gedata <- read.csv('GEStock.csv')
geprice <- select(gedata,Price)
summary(geprice)
hist(as.vector(geprice$Price))
hist(as.vector(geprice$Price),prob=TRUE)
hist(as.vector(geprice$Price),prob=TRUE,col='blue')
hist(as.vector(geprice$Price),prob=TRUE,col='blue',ylim=c(0,0.025))
lines(density(geprice$Price),col='red')
Comments
Post a Comment