Open Anaconda Prompt (Windows) or Terminal (Mac) and type:
pip install ggplot
Next, start jupyter by typing:
jupyter notebook
### Import needed modules
#ggplot is used for plotting and is based on the R package ggplot2
#Pandas is a widely-used Python library for statistics, particularly on tabular data. It installed with Anaconda.
#We will use it to read and write tables in .csv format.
from ggplot import *
import pandas
http://hwheeler01.github.io/comp150/ggplot/gapminder.csv
.ipynb
file).### read in the gapminder data as a pandas DataFrame
gap = pandas.read_csv('gapminder.csv')
### Use DataFrame.info to find out more about a DataFrame
gap.info()
### Use DataFrame.head to view the first few rows
print(gap.head())
### to see more
print(gap.head(n=20))
### to see the last few rows
print(gap.tail())
### Use DataFrame.describe to get summary statistics about continuous data
print(gap.describe())
### Let's initialize a plot
ggplot(gap, aes(x = 'gdpPercap', y = 'lifeExp'))
### Add points
ggplot(gap, aes(x = 'gdpPercap', y = 'lifeExp')) + geom_point()
### Let's log transform the x-axis
ggplot(gap, aes(x = 'gdpPercap', y = 'lifeExp')) + geom_point() + scale_x_log()
### How about some color?
ggplot(gap, aes(x = 'gdpPercap', y = 'lifeExp', color = 'continent')) + geom_point() + scale_x_log()
### Plot lifeExp vs. year colored by continent
ggplot(gap, aes(x = 'year', y = 'lifeExp', color = 'continent')) + geom_point()
### Let's separate by continent
ggplot(gap, aes(x = 'year', y = 'lifeExp', color = 'continent')) + geom_point() + facet_wrap('continent')
### Let's remove Oceania and connect countries with lines
no_oceania = gap[gap.continent != 'Oceania']
ggplot(no_oceania, aes(x = 'year', y = 'lifeExp', color = 'continent', group = 'country')) + \
geom_line() + facet_wrap('continent')
### Let's pull out a subset of countries to plot
subgap = gap[gap.country.str.contains("Cambodia|Rwanda|United|Japan|Mexico")]
ggplot(subgap, aes(x = 'year', y = 'lifeExp', color = 'country')) + geom_line() + geom_point()
### We can also do boxplots
ggplot(gap, aes(x = 'continent', y = 'lifeExp')) + geom_boxplot()
### and change axis labels
ggplot(gap, aes(x = 'continent', y = 'lifeExp')) + geom_boxplot() + xlab("Continent") + ylab("Life Expectancy (years)")
### and histograms
ggplot(gap, aes(x = 'lifeExp')) + geom_histogram()
### we can adjust binwidth
for i in range(1,4):
print("binwidth = " + str(i))
print(ggplot(gap, aes(x = 'lifeExp')) + geom_histogram(binwidth = i))
print("\n")
### we can make density plots
ggplot(gap, aes(x = 'lifeExp')) + geom_density()
### and color by continent
ggplot(gap, aes(x = 'lifeExp', color = 'continent')) + geom_density()
### we can facet anything
ggplot(no_oceania, aes(x = 'lifeExp',color = 'continent')) + geom_histogram(binwidth = 1) + \
facet_wrap('continent')
### Let's play with colors!
ggplot(no_oceania, aes(x = 'year', y = 'lifeExp', color = 'continent', group = 'country')) + \
geom_line() + facet_wrap('continent') + scale_color_manual(values = ['red','purple','teal','orange'])
import brewer2mpl
brewer2mpl.print_maps()
ggplot(no_oceania, aes(x = 'year', y = 'lifeExp', color = 'continent', group = 'country')) + \
geom_line() + facet_wrap('continent') + scale_color_brewer(type = 'seq', palette = 'Greens')
ggplot(no_oceania, aes(x = 'year', y = 'lifeExp', color = 'continent', group = 'country')) + \
geom_line() + facet_wrap('continent') + scale_color_brewer(type = 'div', palette = 'PuOr')
ggplot(no_oceania, aes(x = 'year', y = 'lifeExp', color = 'continent', group = 'country')) + \
geom_line() + facet_wrap('continent') + scale_color_brewer(type = 'qual', palette = 'Pastel1')
ggplot(no_oceania, aes(x = 'year', y = 'lifeExp', color = 'continent', group = 'country')) + \
geom_line() + facet_wrap('continent') + scale_color_brewer(type = 'qual', palette = 'Dark2')
### Change background theme
ggplot(no_oceania, aes(x = 'year', y = 'lifeExp', color = 'continent', group = 'country')) + \
geom_line() + facet_wrap('continent') + scale_color_brewer(type = 'qual', palette = 'Dark2') + \
theme_bw()