In [1]:
import pandas as pd
In [2]:
# Example 1 year of quarterly expenditure series that is downloaded from ONS.
d = {
'date': ['1970-01-01', '1970-04-01', '1970-07-01', '1970-10-01'],
'value': [30, 60, 120, 240],
'fin_quarter': [1, 2, 3, 4]}
df = pd.DataFrame(data=d)
In [3]:
df
Out[3]:
date | value | fin_quarter | |
---|---|---|---|
0 | 1970-01-01 | 30 | 1 |
1 | 1970-04-01 | 60 | 2 |
2 | 1970-07-01 | 120 | 3 |
3 | 1970-10-01 | 240 | 4 |
In [4]:
df['quarter_date'] = pd.to_datetime(df['date'])
df['fin_quarter'] = df['quarter_date'].dt.quarter.astype(str)
df['value'] = pd.to_numeric(df['value'], downcast='float')
In [5]:
quarters = {'1': ['01-01','02-01','03-01'], '2': ['04-01','05-01','06-01'],
'3': ['07-01','08-01','09-01'], '4': ['10-01', '11-01', '12-01']
}
In [6]:
df_monthly = (df
.assign(month_value=lambda x: x['value'] / 3,
month=lambda x: x['fin_quarter'].map(quarters),
year=lambda x: x['quarter_date'].dt.year.astype(str)
)
.explode('month')
)
Create new column month_date
from year and month.
In [7]:
df_monthly['month_date'] = df_monthly['year'] + '-' + df_monthly['month']
In [8]:
df_monthly = df_monthly.reset_index(drop=True)
df_monthly['quarter_date'] = df_monthly['quarter_date'].astype(str)
In [9]:
df_monthly = df_monthly.filter(items=['quarter_date', 'value', 'month_value', 'month_date'])
In [10]:
df_monthly
Out[10]:
quarter_date | value | month_value | month_date | |
---|---|---|---|---|
0 | 1970-01-01 | 30.0 | 10.0 | 1970-01-01 |
1 | 1970-01-01 | 30.0 | 10.0 | 1970-02-01 |
2 | 1970-01-01 | 30.0 | 10.0 | 1970-03-01 |
3 | 1970-04-01 | 60.0 | 20.0 | 1970-04-01 |
4 | 1970-04-01 | 60.0 | 20.0 | 1970-05-01 |
5 | 1970-04-01 | 60.0 | 20.0 | 1970-06-01 |
6 | 1970-07-01 | 120.0 | 40.0 | 1970-07-01 |
7 | 1970-07-01 | 120.0 | 40.0 | 1970-08-01 |
8 | 1970-07-01 | 120.0 | 40.0 | 1970-09-01 |
9 | 1970-10-01 | 240.0 | 80.0 | 1970-10-01 |
10 | 1970-10-01 | 240.0 | 80.0 | 1970-11-01 |
11 | 1970-10-01 | 240.0 | 80.0 | 1970-12-01 |