library(ggplot2)
#retrieve the gapminder data
system('wget http://hwheeler01.github.io/CompBio/gapminder.csv')
#load data
gap <- read.table("gapminder.csv",sep=",",header=TRUE)
#use str() to find out more about the structure of the data.frame
str(gap)
## 'data.frame': 1964 obs. of 6 variables:
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ continent: chr "Asia" "Asia" "Asia" "Asia" ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ pop : num 8425333 9240934 10267083 11537966 13079460 ...
## $ gdpPercap: num 779 821 853 836 740 ...
#use summary() to view summary statistics about each column of the data.frame
summary(gap)
## country continent year lifeExp
## Length:1964 Length:1964 Min. :1952 Min. :23.60
## Class :character Class :character 1st Qu.:1967 1st Qu.:49.94
## Mode :character Mode :character Median :1982 Median :63.30
## Mean :1984 Mean :61.16
## 3rd Qu.:2002 3rd Qu.:72.03
## Max. :2017 Max. :84.80
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.974e+06 1st Qu.: 1358.9
## Median :7.625e+06 Median : 3991.9
## Mean :3.255e+07 Mean : 8529.5
## 3rd Qu.:2.129e+07 3rd Qu.: 10994.3
## Max. :1.420e+09 Max. :113523.1
#view the first few rows
head(gap)
## country continent year lifeExp pop gdpPercap
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 Afghanistan Asia 1977 38.438 14880372 786.1134
#view last few rows
tail(gap)
## country continent year lifeExp pop gdpPercap
## 1959 Zimbabwe Africa 1992 60.377 10704340 693.4208
## 1960 Zimbabwe Africa 1997 46.809 11404948 792.4500
## 1961 Zimbabwe Africa 2002 39.989 11926563 672.0386
## 1962 Zimbabwe Africa 2007 43.487 12311143 469.7093
## 1963 Zimbabwe Africa 2012 54.900 13100000 1850.0000
## 1964 Zimbabwe Africa 2017 61.400 14200000 1910.0000
#to see the whole data.frame (while in RStudio)
View(gap)
#let's initialize a plot
ggplot(gap, aes(x=gdpPercap,y=lifeExp))
#aes stands for aesthetics and is where you tell ggplot what you want on the axes
#let's add points
ggplot(gap, aes(x=gdpPercap,y=lifeExp)) + geom_point()
#let's log tranform the x-axis
ggplot(gap, aes(x=gdpPercap,y=lifeExp)) + geom_point() + scale_x_log10()
#how about some color?
ggplot(gap, aes(x=gdpPercap,y=lifeExp,color=continent)) + geom_point() + scale_x_log10()
### Plot lifeExp vs. year colored by continent
ggplot(gap, aes(x=year, y=lifeExp, color=continent)) + geom_point()
### Let's separate by continent
ggplot(gap, aes(x=year, y=lifeExp, color=continent)) + geom_point() + facet_wrap(~continent)
### Let's remove Oceania and connect countries with lines
# this requires the package dplyr to filter
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
no_oceania <- dplyr::filter(gap,continent != "Oceania")
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent)
### Let's pull out a subset of countries to plot
clist <- c("United States", "Mexico", "Canada")
subgap <- dplyr::filter(gap, country %in% clist)
ggplot(subgap, aes(x=year, y=lifeExp, color=country)) + geom_line() + geom_point()
### We can also do boxplots
ggplot(gap, aes(x=continent, y=lifeExp)) + geom_boxplot()
### and change axis labels
ggplot(gap, aes(x=continent, y=lifeExp)) + geom_boxplot() +
xlab("Continent") + ylab("Life Expectancy (years)")
### We can also do histograms
ggplot(gap, aes(x=lifeExp)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
### we can adjust binwidth
ggplot(gap, aes(x=lifeExp)) + geom_histogram(binwidth = 1)
### we can make density plots
ggplot(gap, aes(x=lifeExp)) + geom_density()
### and color by continent
ggplot(gap, aes(x=lifeExp, color = continent)) + geom_density()
### we can facet anything
ggplot(no_oceania, aes(x=lifeExp, fill = continent)) +
geom_histogram(binwidth = 1) + facet_wrap(~continent)
ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_density() +
facet_wrap(~continent,ncol=1)
Let’s play with colors!
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + scale_color_manual(values = c('red','purple','darkgreen','orange'))
### Print a list of options from the color brewer
library(RColorBrewer)
display.brewer.all()
### Choose a color brewer palette
# type = One of seq (sequential), div (diverging) or qual (qualitative)
# palette = If a string, will use that named palette (see above). If a
# number, will index into the list of palettes of appropriate type
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + scale_color_brewer(palette = 'Dark2')
ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_density() +
facet_wrap(~continent,ncol=1) + scale_fill_brewer(type = 'seq', palette = 1)
### Change background theme
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + scale_color_brewer(type = 'div', palette = 2) + theme_bw()
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + scale_color_brewer(palette = 'PuOr') + theme_classic()
library(ggthemes)
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + theme_economist() + scale_color_economist()
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
facet_wrap(~continent) + theme_gdocs() + scale_color_gdocs()