Qualitative and Quantitative Data

 Qualitative Data / Categorical

shirts <- c('S','M','L','XL','XXL','M','L','XL','XXL','S','M')

shirts

shirt_sizes <- factor(shirts)

shirt_sizes

str(shirt_sizes)

#Doesnt list all the levels, gives numeric representation

#as an integer vector, and integer values corresponding to levels

summary(shirt_sizes)

shirts[2]

shirt_sizes[2]

levels(shirts)

levels(shirt_sizes)#gives the listing of the levels

table(shirt_sizes)


baloons<-c('Blue','Blue','Red','Red','Blue','Yellow','Green')

baloons

baloon_colors<-factor(baloons)

baloon_colors

str(baloon_colors)

summary(baloon_colors)

baloons[1]

baloon_colors[1]

levels(baloon_colors)

table(baloon_colors)

Visualizing Qualitative Data

shirts <- c('S','M','L','XL','XXL','M','M','L','L','XXL','M')

shirt_sizes <- factor(shirts)

shirts_table <- table(shirt_sizes)

shirts_table

barplot(shirts_table)

barplot(shirts_table,col=c('blue','green','red','yellow','Black'), ylab='Count')

shirt_sizes=='M'

sum(shirt_sizes=='M')


plot(shirt_sizes)

#Generic function in R to plot graphs, gives the same result as above

plot(shirt_sizes,col=c('blue','green','red','yellow','Black'), ylab='Count')


pie(shirts_table)

pie(shirts_table,col=c('blue','green','red','yellow','Black'))


age<-factor(c(2,4,3,3,2,1,1,2,3,4,2,3,3,4,1,3,2,1,4,3,2,4))

table(age)

levels(age)

levels(age)<-c("<14","15-24","25-34",">35")

table(age)

barplot(table(age))


#Quantitative / Continuous

songs <- c(5.3,3.6,5.5,4.7,6.7,4.3,4.3,8.9,5.1,5.8,4.4)

songs

length(songs)

max(songs)

min(songs)

sum(songs)

prod(songs)

sort(songs)

sort(songs,decreasing = TRUE)

ratings <- c(2,4,3,3,2,1,1,2,3,4,2,3,3,4,1,3,2,1,4,3,2,4)

ratings

length(ratings)

max(ratings)

min(ratings)

sum(ratings)

prod(ratings)

sort(ratings)

sort(ratings,decreasing = TRUE)


Visualizing Quantitative Data

ratings <- c(2,4,3,3,2,1,1,2,3,4,2,3,3,4,1,3,2,1,4,3,2,4)

length(ratings)

summary(ratings)

hist(ratings)

hist(ratings,prob=TRUE,col="grey")

#Another useful way to visualize quantitative data, is to 

#use density estimate, its the emphirical way of estimating data

lines(density(ratings),col="red")

hist(ratings,probability=TRUE,col="grey")

lines(density(ratings),col="red")

hist(ratings,probability=TRUE,col="grey",ylim=c(0,0.6))

lines(density(ratings),col="red")

boxplot(ratings)

plot(ratings)

stripchart(ratings)

stripchart(ratings,method="jitter")

stripchart(ratings,method="stack")


Visualizing Stock Quantitative Data

library(dplyr)

gedata <- read.csv('GEStock.csv')

geprice <- select(gedata,Price) 

summary(geprice)

hist(as.vector(geprice$Price))

hist(as.vector(geprice$Price),prob=TRUE)

hist(as.vector(geprice$Price),prob=TRUE,col='blue')

hist(as.vector(geprice$Price),prob=TRUE,col='blue',ylim=c(0,0.025))

lines(density(geprice$Price),col='red')


Comments

Popular posts from this blog

Probability: Binomial Distribution

Probability: Normal Distribution

Hypothesis testing