import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df=pd.read_csv(r"C:\Users\yunge\Downloads\multiTimeline (1).csv")
df.head()
Category: All categories | ||||
---|---|---|---|---|
Week | covid: (Malaysia) | hospital: (Malaysia) | vaccine: (Malaysia) | lockdown: (Malaysia) |
2016-07-24 | 0 | 6 | <1 | 0 |
2016-07-31 | 0 | 6 | <1 | <1 |
2016-08-07 | 0 | 6 | <1 | 0 |
2016-08-14 | 0 | 5 | <1 | <1 |
#Skip the first row to omit Category: All categories in the output
df=pd.read_csv(r"C:\Users\yunge\Downloads\multiTimeline (1).csv", skiprows=1)
df.head()
Week | covid: (Malaysia) | hospital: (Malaysia) | vaccine: (Malaysia) | lockdown: (Malaysia) | |
---|---|---|---|---|---|
0 | 2016-07-24 | 0 | 6 | <1 | 0 |
1 | 2016-07-31 | 0 | 6 | <1 | <1 |
2 | 2016-08-07 | 0 | 6 | <1 | 0 |
3 | 2016-08-14 | 0 | 5 | <1 | <1 |
4 | 2016-08-21 | 0 | 5 | <1 | 0 |
#Rename the columns
df.columns=['Week','Covid-19','Hospital','Vaccine','Lockdown']
df.head()
Week | Covid-19 | Hospital | Vaccine | Lockdown | |
---|---|---|---|---|---|
0 | 2016-07-24 | 0 | 6 | <1 | 0 |
1 | 2016-07-31 | 0 | 6 | <1 | <1 |
2 | 2016-08-07 | 0 | 6 | <1 | 0 |
3 | 2016-08-14 | 0 | 5 | <1 | <1 |
4 | 2016-08-21 | 0 | 5 | <1 | 0 |
df.dtypes
Week object Covid-19 int64 Hospital int64 Vaccine object Lockdown object dtype: object
#Chaning Week column data type from object to datetime
from datetime import datetime
df['Week']=pd.to_datetime(df.Week)
df.dtypes
Week datetime64[ns] Covid-19 int64 Hospital int64 Vaccine object Lockdown object dtype: object
#Set Week as index to perform time series analysis
df.set_index('Week',inplace=True)
df.head()
Covid-19 | Hospital | Vaccine | Lockdown | |
---|---|---|---|---|
Week | ||||
2016-07-24 | 0 | 6 | <1 | 0 |
2016-07-31 | 0 | 6 | <1 | <1 |
2016-08-07 | 0 | 6 | <1 | 0 |
2016-08-14 | 0 | 5 | <1 | <1 |
2016-08-21 | 0 | 5 | <1 | 0 |
#Replace <1 with 0
df['Covid-19']=df['Covid-19'].replace('<1',0)
df['Lockdown']=df['Lockdown'].replace('<1',0)
df['Hospital']=df['Hospital'].replace('<1',0)
df['Vaccine']=df['Vaccine'].replace('<1',0)
df.head()
Covid-19 | Hospital | Vaccine | Lockdown | |
---|---|---|---|---|
Week | ||||
2016-07-24 | 0 | 6 | 0 | 0 |
2016-07-31 | 0 | 6 | 0 | 0 |
2016-08-07 | 0 | 6 | 0 | 0 |
2016-08-14 | 0 | 5 | 0 | 0 |
2016-08-21 | 0 | 5 | 0 | 0 |
#Show last 5 datapoints
df.tail()
Covid-19 | Hospital | Vaccine | Lockdown | |
---|---|---|---|---|
Week | ||||
2021-06-13 | 27 | 8 | 12 | 1 |
2021-06-20 | 36 | 6 | 22 | 1 |
2021-06-27 | 28 | 7 | 12 | 2 |
2021-07-04 | 34 | 7 | 16 | 1 |
2021-07-11 | 42 | 7 | 20 | 1 |
df.dtypes
Covid-19 int64 Hospital int64 Vaccine object Lockdown object dtype: object
#Change object dtypes to integer
#Remember to store it in the column to save the dtypes changes
df['Vaccine']=df['Vaccine'].astype(int)
df['Lockdown']=df['Lockdown'].astype(int)
df.dtypes
Covid-19 int64 Hospital int64 Vaccine int32 Lockdown int32 dtype: object
#Use the average of last 12 data points to check the trend using rolling average/ moving average
#Check this website for more details on rolling average https://www.geeksforgeeks.org/how-to-make-a-time-series-plot-with-rolling-average-in-python/
covid=df['Covid-19']
covid.rolling(12).mean().plot(figsize=(20,10),linewidth=2,fontsize=20,color='red')
plt.xlabel('Year',fontsize=20)
Text(0.5, 0, 'Year')
#Lockdown trend
lockdown=df['Lockdown']
lockdown.rolling(12).mean().plot(figsize=(20,10),linewidth=2,fontsize=20,color='blue')
plt.xlabel('Year',fontsize=20)
Text(0.5, 0, 'Year')
#Vaccine trend
vaccine=df['Vaccine']
vaccine.rolling(12).mean().plot(figsize=(20,10),linewidth=2,fontsize=20,color='green')
plt.xlabel('Year',fontsize=20)
Text(0.5, 0, 'Year')
#Hospital trend
hospital=df['Hospital']
hospital.rolling(12).mean().plot(figsize=(20,10),linewidth=2,fontsize=20,color='orange')
plt.xlabel('Year',fontsize=20)
Text(0.5, 0, 'Year')
#Plotting all 4 graphs into 1 combined chart
df.plot(figsize=(20,10),linewidth=2,fontsize=20)
plt.xlabel('Year',fontsize=20)
plt.title("Malaysia's Covid-19 Keywords Comparison",fontsize=30)
plt.legend(fontsize=20)
<matplotlib.legend.Legend at 0x21172c49fa0>
# Remove trend from a time series to investigate seasonality by using 'differencing' diff() function
#Hospital seasonality
hospital.diff().plot(figsize=(20,10),linewidth=2,fontsize=20,color='orange')
plt.xlabel('Year',fontsize=20)
Text(0.5, 0, 'Year')
df.corr()
Covid-19 | Hospital | Vaccine | Lockdown | |
---|---|---|---|---|
Covid-19 | 1.000000 | 0.059998 | 0.516606 | 0.759067 |
Hospital | 0.059998 | 1.000000 | 0.094555 | 0.170065 |
Vaccine | 0.516606 | 0.094555 | 1.000000 | 0.370005 |
Lockdown | 0.759067 | 0.170065 | 0.370005 | 1.000000 |
df.diff().plot(figsize=(20,10),linewidth=2,fontsize=20)
plt.xlabel('Year',fontsize=20)
Text(0.5, 0, 'Year')