Chapter 6 Time series
Almost all economic, finance, and business related data sets are measured over time (e.g. revenues, profits, margins, stock prices etc).
Therefore the ability to both work with - and manipulate - dates and times becomes critical in using python for analysis.
The book Coding for Economists has a wonderful explanation of the basics behind time series, time zones, and creating a date object from a string input.
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import statistics
from scipy.stats import norm
from matplotlib.ticker import EngFormatter, StrMethodFormatter
= pd.read_csv('data/gdp_1960_2020.csv')
gdp
# The GDP numbers here are very long. To make them easier to read we can divide the GDP number by 1 billion.
'gdp_billions'] = gdp['gdp'] / 1_000_000_000
gdp[
# Convert the year to datetime
'date'] = pd.to_datetime(gdp['year'], format='%Y')
gdp[
# Filter for Australia
= gdp[gdp.country == "Australia"]
aus_gdp aus_gdp.tail()
Now that we’ve imported, filtered, and set up the data, let’s put it on a chart.
# Setup plot size.
= plt.subplots(figsize=(8,4))
fig, ax
# Create grid
# Zorder tells it which layer to put it on. We are setting this to 1 and our data to 2 so the grid is behind the data.
="major", axis='y', color='#758D99', alpha=0.6, zorder=1)
ax.grid(which
# Plot data
'date'],aus_gdp['gdp_billions'],
ax.plot(aus_gdp[='#006BA2',
color=2)
linewidth
# Remove splines. Can be done one at a time or can slice with a list.
'top','right','left']].set_visible(False)
ax.spines[[
# Shrink y-lim to make plot a bit tigheter
0, 1950)
ax.set_ylim(
# Set xlim to fit data without going over plot area
1960, 1, 1), pd.datetime(2020, 1, 1))
ax.set_xlim(pd.datetime(
# Reformat x-axis tick labels
=11) # Set tick label size
ax.xaxis.set_tick_params(labelsize
# Reformat y-axis tick labels
0,2000,250), # Set labels again
ax.set_yticklabels(np.arange(= 'left', # Set horizontal alignment to right
ha ='bottom') # Set vertical alignment to make labels on top of gridline verticalalignment
=2, # Pad tick labels so they don't go over y-axis
ax.yaxis.set_tick_params(pad=True, # Put x-axis labels on top
labeltop=False, # Set no x-axis labels on bottom
labelbottom=False, # Set no ticks on bottom
bottom=11) # Set tick label size
labelsize
#ax.yaxis.set_label_position("left")
ax.yaxis.tick_left()
'${x:1.0f}bn')
ax.yaxis.set_major_formatter(
# Add in line and tag
0.12, .9], # Set width of line
ax.plot([.98, .98], # Set height of line
[=fig.transFigure, # Set location relative to plot
transform=False,
clip_on='#E3120B',
color=.6)
linewidth
# Add in title and subtitle
=0.12, y=.93, s="Up, up, and away", transform=fig.transFigure, ha='left', fontsize=13, weight='bold', alpha=.8) ax.text(x
=0.12, y=.88, s="Australia's GDP growth, in billions of USD, 1960-2020", transform=fig.transFigure, ha='left', fontsize=11, alpha=.8)
ax.text(x
# Set source text
=0.12, y=0.01, s="""Source: Kaggle GDP data (1960-2020)""", transform=fig.transFigure, ha='left', fontsize=9, alpha=.7)
ax.text(x
# Export plot as high resolution PNG
'docs/Aus_line.png', # Set path and filename
plt.savefig(= 300, # Set dots per inch
dpi ="tight", # Remove extra whitespace around plot
bbox_inches='white') # Set background color to white
facecolor
plt.show()
6.1 Australian economic data
Our central agencies (e.g. Treasury and RBA) certainly don’t make it easy to work with economic data.
The easiest way (even in 2022) is to download poorly formatted csv’s. You can read a bit more about these methods here.
6.2 Seasonally adjusted data
Chad Fulton has done a superb write up of the necessity to adjust for seasons (including outliers like Christmas Day) using the New York City COVID-19 daily case number data set.