library(ggplot2)

#retrieve the gapminder data
system('wget http://hwheeler01.github.io/CompBio/gapminder.csv')
#load data
gap <- read.table("gapminder.csv",sep=",",header=TRUE)
#use str() to find out more about the structure of the data.frame
str(gap)
## 'data.frame':    1964 obs. of  6 variables:
##  $ country  : chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ continent: chr  "Asia" "Asia" "Asia" "Asia" ...
##  $ year     : int  1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp  : num  28.8 30.3 32 34 36.1 ...
##  $ pop      : num  8425333 9240934 10267083 11537966 13079460 ...
##  $ gdpPercap: num  779 821 853 836 740 ...
#use summary() to view summary statistics about each column of the data.frame
summary(gap)
##    country           continent              year         lifeExp     
##  Length:1964        Length:1964        Min.   :1952   Min.   :23.60  
##  Class :character   Class :character   1st Qu.:1967   1st Qu.:49.94  
##  Mode  :character   Mode  :character   Median :1982   Median :63.30  
##                                        Mean   :1984   Mean   :61.16  
##                                        3rd Qu.:2002   3rd Qu.:72.03  
##                                        Max.   :2017   Max.   :84.80  
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.974e+06   1st Qu.:  1358.9  
##  Median :7.625e+06   Median :  3991.9  
##  Mean   :3.255e+07   Mean   :  8529.5  
##  3rd Qu.:2.129e+07   3rd Qu.: 10994.3  
##  Max.   :1.420e+09   Max.   :113523.1
#view the first few rows
head(gap)
##       country continent year lifeExp      pop gdpPercap
## 1 Afghanistan      Asia 1952  28.801  8425333  779.4453
## 2 Afghanistan      Asia 1957  30.332  9240934  820.8530
## 3 Afghanistan      Asia 1962  31.997 10267083  853.1007
## 4 Afghanistan      Asia 1967  34.020 11537966  836.1971
## 5 Afghanistan      Asia 1972  36.088 13079460  739.9811
## 6 Afghanistan      Asia 1977  38.438 14880372  786.1134
#view last few rows
tail(gap)
##       country continent year lifeExp      pop gdpPercap
## 1959 Zimbabwe    Africa 1992  60.377 10704340  693.4208
## 1960 Zimbabwe    Africa 1997  46.809 11404948  792.4500
## 1961 Zimbabwe    Africa 2002  39.989 11926563  672.0386
## 1962 Zimbabwe    Africa 2007  43.487 12311143  469.7093
## 1963 Zimbabwe    Africa 2012  54.900 13100000 1850.0000
## 1964 Zimbabwe    Africa 2017  61.400 14200000 1910.0000
#to see the whole data.frame (while in RStudio)
View(gap)
#let's initialize a plot
ggplot(gap, aes(x=gdpPercap,y=lifeExp))

#aes stands for aesthetics and is where you tell ggplot what you want on the axes

#let's add points
ggplot(gap, aes(x=gdpPercap,y=lifeExp)) + geom_point()

#let's log tranform the x-axis
ggplot(gap, aes(x=gdpPercap,y=lifeExp)) + geom_point() + scale_x_log10()

#how about some color?
ggplot(gap, aes(x=gdpPercap,y=lifeExp,color=continent)) + geom_point() + scale_x_log10()

### Plot lifeExp vs. year colored by continent
ggplot(gap, aes(x=year, y=lifeExp, color=continent)) + geom_point()

### Let's separate by continent
ggplot(gap, aes(x=year, y=lifeExp, color=continent)) + geom_point() + facet_wrap(~continent)

### Let's remove Oceania and connect countries with lines
# this requires the package dplyr to filter
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
no_oceania <- dplyr::filter(gap,continent != "Oceania")
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() +
  facet_wrap(~continent)

### Let's pull out a subset of countries to plot
clist <- c("United States", "Mexico", "Canada")
subgap <- dplyr::filter(gap, country %in% clist)
ggplot(subgap, aes(x=year, y=lifeExp, color=country)) + geom_line() + geom_point()

### We can also do boxplots
ggplot(gap, aes(x=continent, y=lifeExp)) + geom_boxplot()

### and change axis labels
ggplot(gap, aes(x=continent, y=lifeExp)) + geom_boxplot() +
  xlab("Continent") + ylab("Life Expectancy (years)")

### We can also do histograms
ggplot(gap, aes(x=lifeExp)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

### we can adjust binwidth
ggplot(gap, aes(x=lifeExp)) + geom_histogram(binwidth = 1)

### we can make density plots
ggplot(gap, aes(x=lifeExp)) + geom_density()

### and color by continent
ggplot(gap, aes(x=lifeExp, color = continent)) + geom_density()

### we can facet anything
ggplot(no_oceania, aes(x=lifeExp, fill = continent)) +
  geom_histogram(binwidth = 1) + facet_wrap(~continent)

ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_density() +
  facet_wrap(~continent,ncol=1)

Let’s play with colors!

ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + 
  facet_wrap(~continent) + scale_color_manual(values = c('red','purple','darkgreen','orange'))

### Print a list of options from the color brewer
library(RColorBrewer)
display.brewer.all()

### Choose a color brewer palette

# type = One of seq (sequential), div (diverging) or qual (qualitative)
# palette = If a string, will use that named palette (see above). If a
# number, will index into the list of palettes of appropriate type

ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + 
  facet_wrap(~continent) + scale_color_brewer(palette = 'Dark2')

ggplot(no_oceania, aes(x=lifeExp, fill = continent)) + geom_density() +
  facet_wrap(~continent,ncol=1) + scale_fill_brewer(type = 'seq', palette = 1)

### Change background theme
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + 
  facet_wrap(~continent) + scale_color_brewer(type = 'div', palette = 2) + theme_bw()

ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + 
  facet_wrap(~continent) + scale_color_brewer(palette = 'PuOr') + theme_classic()

library(ggthemes)
ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + 
  facet_wrap(~continent) + theme_economist() + scale_color_economist()

ggplot(no_oceania, aes(x=year, y=lifeExp, color=continent, group=country)) + geom_line() + 
  facet_wrap(~continent) + theme_gdocs() + scale_color_gdocs()