library(ggplot2)
#retrieve the gapminder data
system('wget http://hwheeler01.github.io/CompBio/gapminder.csv')
#load data
gap <- read.table("gapminder.csv",sep=",",header=TRUE)
#use str() to find out more about the structure of the data.frame
str(gap)
## 'data.frame': 1964 obs. of 6 variables:
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ continent: chr "Asia" "Asia" "Asia" "Asia" ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ pop : num 8425333 9240934 10267083 11537966 13079460 ...
## $ gdpPercap: num 779 821 853 836 740 ...
#use summary() to view summary statistics about each column of the data.frame
summary(gap)
## country continent year lifeExp
## Length:1964 Length:1964 Min. :1952 Min. :23.60
## Class :character Class :character 1st Qu.:1967 1st Qu.:49.94
## Mode :character Mode :character Median :1982 Median :63.30
## Mean :1984 Mean :61.16
## 3rd Qu.:2002 3rd Qu.:72.03
## Max. :2017 Max. :84.80
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.974e+06 1st Qu.: 1358.9
## Median :7.625e+06 Median : 3991.9
## Mean :3.255e+07 Mean : 8529.5
## 3rd Qu.:2.129e+07 3rd Qu.: 10994.3
## Max. :1.420e+09 Max. :113523.1
#view the first few rows
head(gap)
## country continent year lifeExp pop gdpPercap
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 Afghanistan Asia 1977 38.438 14880372 786.1134
#view last few rows
tail(gap)
## country continent year lifeExp pop gdpPercap
## 1959 Zimbabwe Africa 1992 60.377 10704340 693.4208
## 1960 Zimbabwe Africa 1997 46.809 11404948 792.4500
## 1961 Zimbabwe Africa 2002 39.989 11926563 672.0386
## 1962 Zimbabwe Africa 2007 43.487 12311143 469.7093
## 1963 Zimbabwe Africa 2012 54.900 13100000 1850.0000
## 1964 Zimbabwe Africa 2017 61.400 14200000 1910.0000
#to see the whole data.frame (while in RStudio)
View(gap)
#let's initialize a plot
ggplot(gap, aes(x=gdpPercap,y=lifeExp))
data:image/s3,"s3://crabby-images/fcb04/fcb0414ca617a4313819c68983d9f6665473419c" alt=""
#aes stands for aesthetics and is where you tell ggplot what you want on the axes
#let's add points
ggplot(gap, aes(x=gdpPercap,y=lifeExp)) + geom_point()
data:image/s3,"s3://crabby-images/4d48b/4d48ba27a4fe9bf98b36b13d8327c82e0a2ac083" alt=""
#let's log tranform the x-axis
ggplot(gap, aes(x=gdpPercap,y=lifeExp)) + geom_point() + scale_x_log10()
data:image/s3,"s3://crabby-images/b7fa7/b7fa7ced4418c6a0dacb316ddf0772946aec154a" alt=""
#how about some color?
ggplot(gap, aes(x=gdpPercap,y=lifeExp,color=continent)) + geom_point() + scale_x_log10()
data:image/s3,"s3://crabby-images/a4581/a4581139c2200acac65d642166c311b60b899d9e" alt=""
### Plot lifeExp vs. year colored by continent
ggplot(gap, aes(x=year, y=lifeExp, color=continent)) + geom_point()
data:image/s3,"s3://crabby-images/ba279/ba279910c4367273eb632ff13ecd487785541827" alt=""
### Let's separate by continent
ggplot(gap, aes(x=year, y=lifeExp, color=continent)) + geom_point() + facet_wrap(~continent)
data:image/s3,"s3://crabby-images/1eed9/1eed90c2d090e259a01f3a2874a9f3732876a2f1" alt=""
### Let's remove Oceania and connect countries with lines
# this requires the package dplyr to filter
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
no_oceania <- dplyr::filter(gap,continent != "Oceania")
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent)
data:image/s3,"s3://crabby-images/b8fe3/b8fe30708f7bbf9e2a8865975e0bf000f9f25354" alt=""
### Let's pull out a subset of countries to plot
clist <- c("United States", "Mexico", "Canada")
subgap <- dplyr::filter(gap, country %in% clist)
ggplot(subgap, aes(x=year, y=lifeExp, color=country)) + geom_line() + geom_point()
data:image/s3,"s3://crabby-images/c9b21/c9b21bbef8fea6d54756a25a0b83f44f89862705" alt=""
### We can also do boxplots
ggplot(gap, aes(x=continent, y=lifeExp)) + geom_boxplot()
data:image/s3,"s3://crabby-images/1cfef/1cfef130a64d13b2a99b129807618063b382a15a" alt=""
### and change axis labels
ggplot(gap, aes(x=continent, y=lifeExp)) + geom_boxplot() +
xlab("Continent") + ylab("Life Expectancy (years)")
data:image/s3,"s3://crabby-images/e888b/e888b85d111957f2fa66d797da097dbddb7ccacb" alt=""
### We can also do histograms
ggplot(gap, aes(x=lifeExp)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data:image/s3,"s3://crabby-images/8cf39/8cf39348cc57fdc0c090391be6bc9f60e6affe2f" alt=""
### we can adjust binwidth
ggplot(gap, aes(x=lifeExp)) + geom_histogram(binwidth = 1)
data:image/s3,"s3://crabby-images/f3b82/f3b828d24b435560c8b20bffe998f9ff841cce24" alt=""
### we can make density plots
ggplot(gap, aes(x=lifeExp)) + geom_density()
data:image/s3,"s3://crabby-images/4a689/4a689f349ca1054d38135d386b0c21757f5352fd" alt=""
### and color by continent
ggplot(gap, aes(x=lifeExp, color = continent)) + geom_density()
data:image/s3,"s3://crabby-images/e67d6/e67d6a0a6c31b4e52a133205850e544c8d98548e" alt=""
### we can facet anything
ggplot(no_oceania, aes(x=lifeExp, fill = continent)) +
geom_histogram(binwidth = 1) + facet_wrap(~continent)
data:image/s3,"s3://crabby-images/ed361/ed361709fa0864dcc5768d6de6b57197117ab909" alt=""
ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_density() +
facet_wrap(~continent,ncol=1)
data:image/s3,"s3://crabby-images/505a9/505a9b202348e713afe59d06c5c10284015bc050" alt=""
Let’s play with colors!
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + scale_color_manual(values = c('red','purple','darkgreen','orange'))
data:image/s3,"s3://crabby-images/35fb9/35fb9a50ff07270b29f3f220556747c9b9b7ac3e" alt=""
### Print a list of options from the color brewer
library(RColorBrewer)
display.brewer.all()
data:image/s3,"s3://crabby-images/1d778/1d7784bd30545ec484edfedb8261191c9a049f8c" alt=""
### Choose a color brewer palette
# type = One of seq (sequential), div (diverging) or qual (qualitative)
# palette = If a string, will use that named palette (see above). If a
# number, will index into the list of palettes of appropriate type
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + scale_color_brewer(palette = 'Dark2')
data:image/s3,"s3://crabby-images/0a582/0a58243bf15feb4c3149a75353b081454139a0d7" alt=""
ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_density() +
facet_wrap(~continent,ncol=1) + scale_fill_brewer(type = 'seq', palette = 1)
data:image/s3,"s3://crabby-images/afa10/afa106a5dabbb357ee6d4df54addb204f6a90059" alt=""
### Change background theme
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + scale_color_brewer(type = 'div', palette = 2) + theme_bw()
data:image/s3,"s3://crabby-images/81639/816393d0b42d5195cae1ad5e09e6f9d955763b43" alt=""
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + scale_color_brewer(palette = 'PuOr') + theme_classic()
data:image/s3,"s3://crabby-images/d1d05/d1d053e781c5de2ca82aab74d76ddf95b13e5a16" alt=""
library(ggthemes)
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + theme_economist() + scale_color_economist()
data:image/s3,"s3://crabby-images/9d0a8/9d0a811bf944936e99dbf7e96334eef8fbaa1a57" alt=""
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + theme_gdocs() + scale_color_gdocs()
data:image/s3,"s3://crabby-images/5df0b/5df0bcd18b47854a5d39e220e8f736392251feb7" alt=""