R Visualizations- Part 2
R Visualizations – ggplot2 (PART-2)
- Distribution
Study of how and where data points are distributed is very important in large amount of data.
Histogram
Histogram plot – continuous variable
library(ggplot2)
theme_set(theme_classic())
graph <- ggplot(mpg, aes(displ)) + scale_fill_brewer(palette = “Spectral”)
graph + geom_histogram(aes(fill=class), binwidth = .1, col=”black”, size=.1) +
labs(title=”Histogram with Auto Binning”, subtitle=”Engine Displacement across Vehicle Classes”)
g raph+ geom_histogram(aes(fill=class), bins=5, col=”black”, size=.1) +
labs(title=”Histogram with Fixed Bins”, subtitle=”Engine Displacement across Vehicle Classes”)
Histogram plot — categorical variable
library(ggplot2)
theme_set(theme_classic())
graph <- ggplot(mpg, aes(manufacturer))
graph + geom_bar(aes(fill=class), width = 0.5) +
theme(axis.text.x = element_text(angle=65, vjust=0.6)) +
labs(title=”Histogram on Categorical Variable”,
subtitle=”Manufacturer across Vehicle Classes”)
Density plot
library(ggplot2)
theme_set(theme_classic())
graph <- ggplot(mpg, aes(cty))
graph + geom_density(aes(fill=factor(cyl)), alpha=0.8) +
labs(title=”Density plot”, subtitle=”City Mileage Grouped by Number of cylinders”,
caption=”Source: mpg”, x=”City Mileage”, fill=”# Cylinders”)
Box Plot
To study the data distribution Box plot is an excellent tool. It will show the basic summary information through plot.
library(ggplot2)
theme_set(theme_classic())
graph <- ggplot(mpg, aes(class, cty))
graph + geom_boxplot(varwidth=T, fill=”plum”) +
labs(title=”Box plot”,subtitle=”City Mileage grouped by Class of vehicle”,
caption=”Source: mpg”, x=”Class of Vehicle”, y=”City Mileage”)
library(ggthemes)
graph <- ggplot(mpg, aes(class, cty))
graph + geom_boxplot(aes(fill=factor(cyl))) + theme(axis.text.x = element_text(angle=65, vjust=0.6)) +
labs(title=”Box plot”, subtitle=”City Mileage grouped by Class of vehicle”, caption=”Source: mpg”,
x=”Class of Vehicle”, y=”City Mileage”)
Dot + Box Plot
library(ggplot2)
theme_set(theme_bw())
graph<- ggplot(mpg, aes(manufacturer, cty))
graph + geom_boxplot() + geom_dotplot(binaxis=’y’,stackdir=’center’, dotsize = .5,fill=”red”) +
theme(axis.text.x = element_text(angle=65, vjust=0.6)) +
labs(title=”Box plot + Dot plot”,subtitle=”City Mileage vs Class: Each dot represents 1 row in source data”,
caption=”Source: mpg”, x=”Class of Vehicle”,y=”City Mileage”)
Tufte Boxplot
library(ggthemes)
library(ggplot2)
theme_set(theme_tufte())
graph <- ggplot(mpg, aes(manufacturer, cty))
graph + geom_tufteboxplot() +
theme(axis.text.x = element_text(angle=65, vjust=0.6)) +
labs(title=”Tufte Styled Boxplot”, subtitle=”City Mileage grouped by Class of vehicle”,
caption=”Source: mpg”, x=”Class of Vehicle”, y=”City Mileage”)
For Free, Demo classes Call: 8605110150
Registration Link: Click Here!
Violin Plot
library(ggplot2)
theme_set(theme_bw())
graph <- ggplot(mpg, aes(class, cty))
graph + geom_violin() + labs(title=”Violin plot”, subtitle=”City Mileage vs Class of vehicle”,
caption=”Source: mpg”, x=”Class of Vehicle”, y=”City Mileage”)
Population Pyramid
library(ggplot2)
library(ggthemes)
options(scipen = 999)
email_campaign <- read.csv(“https://raw.githubusercontent.com/selva86/datasets/master/email_campaign_funnel.csv”)
brks <- seq(-15000000, 15000000, 5000000)
lbls = paste0(as.character(c(seq(15, 0, -5), seq(5, 15, 5))), “m”)
ggplot(email_campaign, aes(x = Stage, y = Users, fill = Gender)) +
geom_bar(stat = “identity”, width = .6) + scale_y_continuous(breaks = brks, labels = lbls) +
coord_flip() + labs(title=”Email Campaign “) + theme_tufte() +
theme(plot.title = element_text(hjust = .5),
axis.ticks = element_blank()) +
scale_fill_brewer(palette = “Dark2”)
For Free, Demo classes Call: 8605110150
Registration Link: Click Here!
Violin Plot
- Composition
Waffle Chart
v <- mpg$class
nr <- 10
dfrm <- expand.grid(y = 1:nr, x = 1:nr)
table <- round(table(v) * ((nr*nr)/(length(v))))
table
dfrm$category <- factor(rep(names(table), table))
ggplot(dfrm, aes(x = x, y = y, fill = category)) +
geom_tile(color = “black”, size = 0.5) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0), trans = ‘reverse’) +
scale_fill_brewer(palette = “Set3”) +
labs(title=”Waffle Chart”, subtitle=”‘Class’ of vehicles”,
caption=”Source: mpg”) +
theme(panel.border = element_rect(size = 2),
plot.title = element_text(size = rel(1.2)),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.title = element_blank(),
legend.position = “right”)
Pie Chart
library(ggplot2)
theme_set(theme_classic())
data <- as.data.frame(table(mpg$class))
colnames(data) <- c(“class”, “freq”)
piechart <- ggplot(data, aes(x = “”, y=freq, fill = factor(class))) +
geom_bar(width = 1, stat = “identity”) +
theme(axis.line = element_blank(),
plot.title = element_text(hjust=0.5)) +
labs(fill=”class”, x=NULL,y=NULL, title=”Pie Chart of class”, caption=”Source: mpg”)
piechart + coord_polar(theta = “y”, start=0)
piechart <- ggplot(mpg, aes(x = “”, fill = factor(class))) +
geom_bar(width = 1) + theme(axis.line = element_blank(),plot.title = element_text(hjust=0.5)) +
labs(fill=”class”, x=NULL, y=NULL,title=”Pie Chart of class”,caption=”Source: mpg”)
piechart + coord_polar(theta = “y”, start=0)
For Free, Demo classes Call: 8605110150
Registration Link: Click Here!
Violin Plot
Treemap
library(ggplot2)
library(treemapify)
langs <- read.csv(“https://raw.githubusercontent.com/selva86/datasets/master/proglanguages.csv”)
treeMap <- treemapify(langs,area = “value”,fill = “parent”,label = “id”,group = “parent”)
treePlot <- ggplotify(treeMap) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
scale_fill_brewer(palette = “Dark2”)
print(treePlot)
Bar Chart
freq <- table(mpg$manufacturer)
data <- as.data.frame.table(freq)
head(data)
library(ggplot2)
theme_set(theme_classic())
graph <- ggplot(data, aes(Var1, Freq))
graph + geom_bar(stat=”identity”, width = 0.5, fill=”tomato2″) +
labs(title=”Bar Chart”, subtitle=”Manufacturer of vehicles”,
caption=”Source: Frequency of Manufacturers from ‘mpg’ dataset”) +
theme(axis.text.x = element_text(angle=65, vjust=0.6))
graph <- ggplot(mpg, aes(manufacturer))
graph + geom_bar(aes(fill=class), width = 0.5) +
theme(axis.text.x = element_text(angle=65, vjust=0.6)) +
labs(title=”Categorywise Bar Chart”, subtitle=”Manufacturer of vehicles”,
caption=”Source: Manufacturers from ‘mpg’ dataset”)
- Change
library(ggplot2)
library(ggfortify)
theme_set(theme_classic())
autoplot(AirPassengers) + labs(title=”AirPassengers”) + theme(plot.title = element_text(hjust=0.5))
library(ggplot2)
theme_set(theme_classic())
For Free, Demo classes Call: 8605110150
Registration Link: Click Here!
ggplot(economics, aes(x=date)) + geom_line(aes(y=returns_perc)) +
labs(title=”Time Series Chart”, subtitle=”Returns Percentage from ‘Economics’ Dataset”,
caption=”Source: Economics”, y=”Returns %”)
library(ggplot2)
library(lubridate)
theme_set(theme_bw())
economic <- economics[1:24, ]
lbels <- paste0(month.abb[month(economic$date)], ” “, lubridate::year(economic$date))
breks <- economic$date
ggplot(economic, aes(x=date)) + geom_line(aes(y=returns_perc)) +
labs(title=”Monthly Time Series”, subtitle=”Returns Percentage from Economics Dataset”,
caption=”Source: Economics”, y=”Returns %”) + scale_x_date(labels = lbels, breaks = breks) +
theme(axis.text.x = element_text(angle = 90, vjust=0.5), panel.grid.minor = element_blank())
library(ggplot2)
library(lubridate)
theme_set(theme_bw())
economic <- economics[1:90, ]
breks <- economic$date[seq(1, length(economic$date), 12)]
lbels <- lubridate::year(breks)
ggplot(economic, aes(x=date)) + geom_line(aes(y=returns_perc)) +
labs(title=”Yearly Time Series”, subtitle=”Returns Percentage from Economics Dataset”,
caption=”Source: Economics”, y=”Returns %”) +
scale_x_date(labels = lbels,breaks = breks) +
theme(axis.text.x = element_text(angle = 90, vjust=0.5),
panel.grid.minor = element_blank())
data(economics_long, package = “ggplot2”)
head(economics_long)
library(ggplot2)
library(lubridate)
theme_set(theme_bw())
daf <- economics_long[economics_long$variable %in% c(“psavert”, “uempmed”), ]
daf <- df[lubridate::year(daf$date) %in% c(1967:1981), ]
breks <- daf$date[seq(1, length(daf$date), 12)]
lbels <- lubridate::year(breks)
ggplot(daf, aes(x=date)) +
geom_line(aes(y=value, col=variable)) +
labs(title=”Time Series of Returns Percentage”,
subtitle=”Drawn from Long Data format”,
caption=”Source: Economics”, y=”Returns %”,
color=NULL) + scale_x_date(labels = lbels, breaks = breks) +
scale_color_manual(labels = c(“psavert”, “uempmed”),
values = c(“psavert”=”#00ba38”, “uempmed”=”#f8766d”)) +
theme(axis.text.x = element_text(angle = 90, vjust=0.5, size = 8),
panel.grid.minor = element_blank())
library(ggplot2)
library(lubridate)
theme_set(theme_bw())
daf <- economics[, c(“date”, “psavert”, “uempmed”)]
daf <- daf[lubridate::year(daf$date) %in% c(1967:1981), ]
breks <- daf$date[seq(1, length(daf$date), 12)]
lbels <- lubridate::year(breks)
ggplot(daf, aes(x=date)) +
geom_line(aes(y=psavert, col=”psavert”)) +
geom_line(aes(y=uempmed, col=”uempmed”)) +
labs(title=”Time Series of Returns Percentage”,
subtitle=”Drawn From Wide Data format”,
caption=”Source: Economics”, y=”Returns %”) +
scale_x_date(labels = lbels, breaks = breks) +
scale_color_manual(name=””,
values = c(“psavert”=”#00ba38”, “uempmed”=”#f8766d”)) +
theme(panel.grid.minor = element_blank())
Stacked Area Chart
library(ggplot2)
library(lubridate)
theme_set(theme_bw())
daf <- economics[, c(“date”, “psavert”, “uempmed”)]
daf <- df[lubridate::year(daf$date) %in% c(1967:1981), ]
breks <- daf$date[seq(1, length(daf$date), 12)]
lbels <- lubridate::year(breks)
ggplot(daf, aes(x=date)) +
geom_area(aes(y=psavert+uempmed, fill=”psavert”)) +
geom_area(aes(y=uempmed, fill=”uempmed”)) +
labs(title=”Area Chart of Returns Percentage”, subtitle=”From Wide Data format”,
caption=”Source: Economics”, y=”Returns %”) +
scale_x_date(labels = lbels, breaks = breks) +
scale_fill_manual(name=””,values = c(“psavert”=”#00ba38”, “uempmed”=”#f8766d”)) +
theme(panel.grid.minor = element_blank())
Calendar Heatmap
library(ggplot2)
library(plyr)
library(scales)
library(zoo)
daf <- read.csv(“https://raw.githubusercontent.com/selva86/datasets/master/yahoo.csv”)
daf$date <- as.Date(daf$date)
daf <- daf[daf$year >= 2012, ]
daf$yearmonth <- as.yearmon(daf$date)
daf$yearmonthf <- factor(daf$yearmonth)
daf <- ddply(daf,.(yearmonthf), transform, monthweek=1+week-min(week))
daf <- daf[, c(“year”, “yearmonthf”, “monthf”, “week”, “monthweek”, “weekdayf”, “VIX.Close”)]
head(daf)
ggplot(daf, aes(monthweek, weekdayf, fill = VIX.Close)) +
geom_tile(colour = “white”) +
facet_grid(year~monthf) +
scale_fill_gradient(low=”red”, high=”green”) +
labs(x=”Week of Month”, y=””, title = “Time-Series Calendar Heatmap”,
subtitle=”Yahoo Closing Price”, fill=”Close”)
Slope Chart
library(dplyr)
theme_set(theme_classic())
source_daf <- read.csv(“https://raw.githubusercontent.com/jkeirstead/r-slopegraph/master/cancer_survival_rates.csv”)
sort <- function(daf, x=”year”, y=”value”, group=”group”, method=”tufte”, min.space=0.05) {
id <- match(c(x, y, group), names(daf))
daf <- daf[,id]
names(daf) <- c(“x”, “y”, “group”)
temp <- expand.grid(x=unique(daf$x), group=unique(daf$group))
temp <- merge(daf, temp, all.y=TRUE)
daf <- mutate(temp, y=ifelse(is.na(y), 0, y))
require(reshape2)
temp <- dcast(daf, group ~ x, value.var=”y”)
ordr <- order(temp[,2])
temp <- temp[ordr,]
min.sp<- min.sp*diff(range(temp[,-1]))
y <- numeric(nrow(temp))
for (i in 2:nrow(temp)) {
matx <- as.matrix(temp[(i-1):i, -1])
dmin <- min(diff(matx))
y[i] <- ifelse(dmin < min.sp, min.sp – dmin, 0)
}
temp <- cbind(temp, yshift=cumsum(y))
scale <- 1
temp <- melt(temp, id=c(“group”, “yshift”), variable.name=”x”, value.name=”y”)
temp <- transform(temp, ypos=y + scale*y)
return(temp)
}
plot <- function(daf) {
y <- subset(daf, x==head(x,1))$group
yv <- subset(daf, x==head(x,1))$ypos
fontS <- 3
ggp <- ggplot(daf,aes(x=x,y=ypos)) +
geom_line(aes(group=group),colour=”grey80″) +
geom_point(colour=”white”,size=8) +
geom_text(aes(label=y), size=fontS, family=”American Typewriter”) +
scale_y_continuous(name=””, breaks=yv, labels=y)
return(ggp)
}
daf <- tufte_sort(source_daf, x=”year”, y=”value”, group=”group”, method=”tufte”,
min.space=0.05)
For Free, Demo classes Call: 8605110150
Registration Link: Click Here!
daf <- transform(daf, x=factor(x, levels=c(5,10,15,20),
labels=c(“5 years”,”10 years”,”15 years”,”20 years”)), y=round(y))
plot_slopegraph(daf) + labs(title=”Estimates of % survival rates”) +
theme(axis.title=element_blank(),axis.ticks = element_blank(),
plot.title = element_text(hjust=0.5, family = “American Typewriter”, face=”bold”),
axis.text = element_text(family = “American Typewriter”, face=”bold”))
Seasonal Plot
library(ggplot2)
library(forecast)
theme_set(theme_classic())
small <- window(nottem, start=c(1920, 1), end=c(1925, 12))
ggseasonplot(AirPassengers) + labs(title=”Seasonal plot: International Airline Passengers”)
ggseasonplot(nottem_small) + labs(title=”Seasonal plot: Air temperatures at Nottingham Castle”)
- Groups
library(ggplot2)
library(ggdendro)
theme_set(theme_bw())
hcd <- hclust(dist(USArrests), “ave”)
ggdendrogram(hcd, rotate = TRUE, size = 2)
Clusters
library(ggplot2)
library(ggalt)
library(ggfortify)
theme_set(theme_classic())
daf <- iris[c(1, 2, 3, 4)]
pca_mod <- prcomp(daf)
daf_pc <- data.frame(pca_mod$x, Species=iris$Species)
daf_pc_vir <- daf_pc[daf_pc$Species == “virginica”, ]
daf_pc_set <- daf_pc[daf_pc$Species == “setosa”, ]
daf_pc_ver <- daf_pc[daf_pc$Species == “versicolor”, ]
ggplot(daf_pc, aes(PC1, PC2, col=Species)) +
geom_point(aes(shape=Species), size=2) + labs(title=”Iris Clustering”,
subtitle=”With principal components PC1 and PC2 as X and Y axis”,
caption=”Source: Iris”) + coord_cartesian(xlim = 1.2 * c(min(daf_pc$PC1), max(daf_pc$PC1)),
ylim = 1.2 * c(min(daf_pc$PC2), max(daf_pc$PC2))) +
geom_encircle(data = daf_pc_vir, aes(x=PC1, y=PC2)) +
geom_encircle(data = daf_pc_set, aes(x=PC1, y=PC2)) +
geom_encircle(data = daf_pc_ver, aes(x=PC1, y=PC2))
- Spatial
library(ggplot2)
library(ggmap)
library(ggalt)
ch <- geocode(“Chennai”)
ch_sat_map <- qmap(“chennai”, zoom=12, source = “google”, maptype=”satellite”)
ch_road_map <- qmap(“chennai”, zoom=12, source = “google”, maptype=”roadmap”)
ch_hybrid_map <- qmap(“chennai”, zoom=12, source = “google”, maptype=”hybrid”)
ch_osm_map <- qmap(“chennai”, zoom=12, source = “osm”)
ch_places <- c(“Kolathur”,
“Washermanpet”,
“Royapettah”,
“Adyar”,
“Guindy”)
pl_loc <- geocode(chennai_places)
ch_osm_map + geom_point(aes(x=lon, y=lat),
data = pl_loc,
alpha = 0.7,
size = 7,
color = “tomato”) +
geom_encircle(aes(x=lon, y=lat),
data = pl_loc, size = 2, color = “blue”)
ch_road_map + geom_point(aes(x=lon, y=lat),
data = pl_loc,
alpha = 0.7,
size = 7,
color = “tomato”) +
geom_encircle(aes(x=lon, y=lat),
data = pl_loc, size = 2, color = “blue”)
ch_hybrid_map + geom_point(aes(x=lon, y=lat),
data = pl_loc,
alpha = 0.7,
size = 7,
color = “tomato”) +
geom_encircle(aes(x=lon, y=lat),
data = pl_loc, size = 2, color = “blue”)
Sample Plot practice:-
1) Sample_Numbers<-table(mtcars$cyl,mtcars$gear)
barplot(Sample_Numbers,main=’Automobile cylinder number
gears’,col=c(‘red’,’orange’,’steelblue’),
legend=rownames(Sample_Numbers),xlab=’Number of Gears’,
ylab=’count’)
2) hist(airquality$Temp,col=’steelblue’,main=’Maximum Daily Temperature’,xlab=’Temperature (degrees Fahrenheit)’)
3) Sample_x<-rnorm(10,mean=rep(1:5,each=2),sd=0.7)
Sample_y<-rnorm(10,mean=rep(c(1,9),each=5),sd=0.1)
data<-data.frame(x=Sample_x,y=Sample_y)
set.seed(143)
data_Sample<-as.matrix(data)[sample(1:10),]
heatmap(data_Sample)
4) with(subset(airquality,Month==9),plot(Wind,Ozone,col=’steelblue
title(‘Wind and Temperature in NYC in September of 1973’)
For Free, Demo classes Call: 8605110150
Registration Link: Click Here!
5) sample_cars<-transform(sample_cars,cyl=factor(cyl))
class(sample_cars$cyl)
boxplot(mpg~cyl,sample_cars,xlab=’Number of
Cylinders’,ylab=’miles per gallon’,main=’miles per gallon
for varied cylinders in automobiles’,cex.main=1.2)
6) corr_sample <- cor(sample_cars)
corrplot(corr_sample)
corrplot(corr_sample, method = ‘number’,type = “lower”)
7) airquality %>%
group_by(Day) %>%
summarise(mean_wind = mean(Wind)) %>%
ggplot() +geom_area(aes(x = Day, y = mean_wind)) +
labs(title = “Area Chart of Average Wind per Day”,
subtitle = “using airquality data”,y = “Mean Wind”)
Author:-
Rahul Pund
Call the Trainer and Book your free demo Class now!!!
© Copyright 2019 | Sevenmentor Pvt Ltd.