import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df=pd.read_csv(r'C:\Users\yunge\Desktop\Data Science\Python\Data Analysis Projects\Hotel Booking Analysis\hotel_bookings.csv')
df.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | NaN | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | NaN | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
5 rows × 32 columns
df.shape
(119390, 32)
df.isnull().values.any()
True
df.isnull().sum()
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 4 babies 0 meal 0 country 488 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 agent 16340 company 112593 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
# Fill the null values with 0
df.fillna(0,inplace=True)
df.isna().sum()
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 0 babies 0 meal 0 country 0 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 agent 0 company 0 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
df['meal'].value_counts()
BB 92310 HB 14463 SC 10650 Undefined 1169 FB 798 Name: meal, dtype: int64
df['meal'].unique()
array(['BB', 'FB', 'HB', 'SC', 'Undefined'], dtype=object)
df['adults'].unique()
array([ 2, 1, 3, 4, 40, 26, 50, 27, 55, 0, 20, 6, 5, 10], dtype=int64)
df['children'].unique()
array([ 0., 1., 2., 10., 3.])
df['babies'].unique()
array([ 0, 1, 2, 10, 9], dtype=int64)
# There are 180 rows which have 0 as entry for all 3 columns 'adults, children and babies', indicating these are wrong entries as there are no guest who checked into the hotel
filter=(df['adults']==0) & (df['children']==0) & (df['babies']==0)
df[filter]
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2224 | Resort Hotel | 0 | 1 | 2015 | October | 41 | 6 | 0 | 3 | 0 | ... | No Deposit | 0.0 | 174.0 | 0 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 10/6/2015 |
2409 | Resort Hotel | 0 | 0 | 2015 | October | 42 | 12 | 0 | 0 | 0 | ... | No Deposit | 0.0 | 174.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 10/12/2015 |
3181 | Resort Hotel | 0 | 36 | 2015 | November | 47 | 20 | 1 | 2 | 0 | ... | No Deposit | 38.0 | 0.0 | 0 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 11/23/2015 |
3684 | Resort Hotel | 0 | 165 | 2015 | December | 53 | 30 | 1 | 4 | 0 | ... | No Deposit | 308.0 | 0.0 | 122 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 1/4/2016 |
3708 | Resort Hotel | 0 | 165 | 2015 | December | 53 | 30 | 2 | 4 | 0 | ... | No Deposit | 308.0 | 0.0 | 122 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 1/5/2016 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
115029 | City Hotel | 0 | 107 | 2017 | June | 26 | 27 | 0 | 3 | 0 | ... | No Deposit | 7.0 | 0.0 | 0 | Transient | 100.80 | 0 | 0 | Check-Out | 6/30/2017 |
115091 | City Hotel | 0 | 1 | 2017 | June | 26 | 30 | 0 | 1 | 0 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.00 | 1 | 1 | Check-Out | 7/1/2017 |
116251 | City Hotel | 0 | 44 | 2017 | July | 28 | 15 | 1 | 1 | 0 | ... | No Deposit | 425.0 | 0.0 | 0 | Transient | 73.80 | 0 | 0 | Check-Out | 7/17/2017 |
116534 | City Hotel | 0 | 2 | 2017 | July | 28 | 15 | 2 | 5 | 0 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient-Party | 22.86 | 0 | 1 | Check-Out | 7/22/2017 |
117087 | City Hotel | 0 | 170 | 2017 | July | 30 | 27 | 0 | 2 | 0 | ... | No Deposit | 52.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/29/2017 |
180 rows × 32 columns
# There are 119,210 rows left after calling the filter; ~ as negation
df[~filter]
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.00 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.00 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.00 | 0 | 1 | Check-Out | 7/3/2015 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
119385 | City Hotel | 0 | 23 | 2017 | August | 35 | 30 | 2 | 5 | 2 | ... | No Deposit | 394.0 | 0.0 | 0 | Transient | 96.14 | 0 | 0 | Check-Out | 9/6/2017 |
119386 | City Hotel | 0 | 102 | 2017 | August | 35 | 31 | 2 | 5 | 3 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient | 225.43 | 0 | 2 | Check-Out | 9/7/2017 |
119387 | City Hotel | 0 | 34 | 2017 | August | 35 | 31 | 2 | 5 | 2 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient | 157.71 | 0 | 4 | Check-Out | 9/7/2017 |
119388 | City Hotel | 0 | 109 | 2017 | August | 35 | 31 | 2 | 5 | 2 | ... | No Deposit | 89.0 | 0.0 | 0 | Transient | 104.40 | 0 | 0 | Check-Out | 9/7/2017 |
119389 | City Hotel | 0 | 205 | 2017 | August | 35 | 29 | 2 | 7 | 2 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient | 151.20 | 0 | 2 | Check-Out | 9/7/2017 |
119210 rows × 32 columns
# This will be our processed data and we will be using this dataset for further analysis
data=df[~filter]
data.shape
(119210, 32)
df['hotel'].unique()
array(['Resort Hotel', 'City Hotel'], dtype=object)
df['is_canceled'].unique()
array([0, 1], dtype=int64)
resort=data[(data['hotel']=='Resort Hotel') & (data['is_canceled']==0)]
city=data[(data['hotel']=='City Hotel') & (data['is_canceled']==0)]
resort.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
5 rows × 32 columns
city.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
40060 | City Hotel | 0 | 6 | 2015 | July | 27 | 1 | 0 | 2 | 1 | ... | No Deposit | 6.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/3/2015 |
40066 | City Hotel | 0 | 3 | 2015 | July | 27 | 2 | 0 | 3 | 1 | ... | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 58.67 | 0 | 0 | Check-Out | 7/5/2015 |
40070 | City Hotel | 0 | 43 | 2015 | July | 27 | 3 | 0 | 2 | 2 | ... | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 86.00 | 0 | 0 | Check-Out | 7/5/2015 |
40071 | City Hotel | 0 | 43 | 2015 | July | 27 | 3 | 0 | 2 | 2 | ... | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 43.00 | 0 | 0 | Check-Out | 7/5/2015 |
40072 | City Hotel | 0 | 43 | 2015 | July | 27 | 3 | 0 | 2 | 2 | ... | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 86.00 | 0 | 0 | Check-Out | 7/5/2015 |
5 rows × 32 columns
resort.shape
(28927, 32)
city.shape
(46084, 32)
pip install plotly
Requirement already satisfied: plotly in c:\users\yunge\anaconda3\lib\site-packages (5.1.0) Requirement already satisfied: six in c:\users\yunge\anaconda3\lib\site-packages (from plotly) (1.15.0) Requirement already satisfied: tenacity>=6.2.0 in c:\users\yunge\anaconda3\lib\site-packages (from plotly) (8.0.1) Note: you may need to restart the kernel to use updated packages.
import plotly.graph_objs as go
from plotly.offline import iplot
import plotly.express as px
labels=resort['country'].value_counts().index
values=resort['country'].value_counts()
fig=px.pie(values=values,
names=labels,
title="Home Country of Guests for Resort Hotel",
template="seaborn")
fig.update_traces(textposition="inside", textinfo="value+percent+label")
fig.show()
labels2=city['country'].value_counts().index
values2=city['country'].value_counts()
fig=px.pie(values=values2,
names=labels2,
title="Home Country of Guests for City Hotel",
template="seaborn")
fig.update_traces(textposition="inside", textinfo="value+percent+label")
fig.show()
# A dataframe where is_canceled equals 0, including both resort hotel and city hotel
data[data['is_canceled']==0]
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.00 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.00 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.00 | 0 | 1 | Check-Out | 7/3/2015 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
119385 | City Hotel | 0 | 23 | 2017 | August | 35 | 30 | 2 | 5 | 2 | ... | No Deposit | 394.0 | 0.0 | 0 | Transient | 96.14 | 0 | 0 | Check-Out | 9/6/2017 |
119386 | City Hotel | 0 | 102 | 2017 | August | 35 | 31 | 2 | 5 | 3 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient | 225.43 | 0 | 2 | Check-Out | 9/7/2017 |
119387 | City Hotel | 0 | 34 | 2017 | August | 35 | 31 | 2 | 5 | 2 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient | 157.71 | 0 | 4 | Check-Out | 9/7/2017 |
119388 | City Hotel | 0 | 109 | 2017 | August | 35 | 31 | 2 | 5 | 2 | ... | No Deposit | 89.0 | 0.0 | 0 | Transient | 104.40 | 0 | 0 | Check-Out | 9/7/2017 |
119389 | City Hotel | 0 | 205 | 2017 | August | 35 | 29 | 2 | 7 | 2 | ... | No Deposit | 9.0 | 0.0 | 0 | Transient | 151.20 | 0 | 2 | Check-Out | 9/7/2017 |
75011 rows × 32 columns
# Currently country used as index
data[data['is_canceled']==0]['country'].value_counts()
PRT 20977 GBR 9668 FRA 8468 ESP 6383 DEU 6067 ... ZMB 1 GUY 1 BWA 1 MRT 1 BHR 1 Name: country, Length: 166, dtype: int64
# Reset index
country_data=data[data['is_canceled']==0]['country'].value_counts().reset_index()
country_data.head()
index | country | |
---|---|---|
0 | PRT | 20977 |
1 | GBR | 9668 |
2 | FRA | 8468 |
3 | ESP | 6383 |
4 | DEU | 6067 |
# Renaming columns of country_data
country_data.columns=['Country','Number of Guests']
country_data.head()
Country | Number of Guests | |
---|---|---|
0 | PRT | 20977 |
1 | GBR | 9668 |
2 | FRA | 8468 |
3 | ESP | 6383 |
4 | DEU | 6067 |
px.choropleth(country_data,
locations=country_data['Country'],
color=country_data['Number of Guests'],
hover_name=country_data['Country'],
title='Home Country of Guests')
From the map above, we can conclude that majority of the hotel guests come from European countries.
data.shape
(119210, 32)
data.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
5 rows × 32 columns
data.columns
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'arrival_date_day_of_month', 'stays_in_weekend_nights', 'stays_in_week_nights', 'adults', 'children', 'babies', 'meal', 'country', 'market_segment', 'distribution_channel', 'is_repeated_guest', 'previous_cancellations', 'previous_bookings_not_canceled', 'reserved_room_type', 'assigned_room_type', 'booking_changes', 'deposit_type', 'agent', 'company', 'days_in_waiting_list', 'customer_type', 'adr', 'required_car_parking_spaces', 'total_of_special_requests', 'reservation_status', 'reservation_status_date'], dtype='object')
data2=data[data['is_canceled']==0]
# ADR as average daily rate
plt.figure(figsize=(12,8))
sns.boxplot(x='reserved_room_type',y='adr',data=data2,hue='hotel')
plt.title('Price of Room Type Per Night',fontsize=25)
plt.xlabel('Room Type',fontsize=15)
plt.ylabel('Average Daily Rate in [EUR]',fontsize=15)
Text(0, 0.5, 'Average Daily Rate in [EUR]')
data_resort=resort[resort['is_canceled']==0]
data_city=city[city['is_canceled']==0]
resort_mean=data_resort.groupby('arrival_date_month')['adr'].mean().reset_index()
resort_mean
arrival_date_month | adr | |
---|---|---|
0 | April | 75.867816 |
1 | August | 181.205892 |
2 | December | 68.410104 |
3 | February | 54.147478 |
4 | January | 48.761125 |
5 | July | 150.122528 |
6 | June | 107.974850 |
7 | March | 57.056838 |
8 | May | 76.657558 |
9 | November | 48.706289 |
10 | October | 61.775449 |
11 | September | 96.416860 |
city_mean=data_city.groupby('arrival_date_month')['adr'].mean().reset_index()
city_mean
arrival_date_month | adr | |
---|---|---|
0 | April | 111.962267 |
1 | August | 118.674598 |
2 | December | 88.401855 |
3 | February | 86.520062 |
4 | January | 82.330983 |
5 | July | 115.818019 |
6 | June | 117.874360 |
7 | March | 90.658533 |
8 | May | 120.669827 |
9 | November | 86.946592 |
10 | October | 102.004672 |
11 | September | 112.776582 |
# Merging dataframe using same column
final_mean=resort_mean.merge(city_mean,on='arrival_date_month')
# Renaming columns
final_mean.columns=['Month','Price for Resort Hotel','Price for City Hotel']
final_mean.head()
Month | Price for Resort Hotel | Price for City Hotel | |
---|---|---|---|
0 | April | 75.867816 | 111.962267 |
1 | August | 181.205892 | 118.674598 |
2 | December | 68.410104 | 88.401855 |
3 | February | 54.147478 | 86.520062 |
4 | January | 48.761125 | 82.330983 |
# Sorting Month
!pip install sorted-months-weekdays
Requirement already satisfied: sorted-months-weekdays in c:\users\yunge\anaconda3\lib\site-packages (0.2)
!pip install sort-dataframeby-monthorweek
Requirement already satisfied: sort-dataframeby-monthorweek in c:\users\yunge\anaconda3\lib\site-packages (0.4)
import sort_dataframeby_monthorweek as sd
final_mean2=sd.Sort_Dataframeby_Month(final_mean,'Month')
final_mean2.head()
Month | Price for Resort Hotel | Price for City Hotel | |
---|---|---|---|
0 | January | 48.761125 | 82.330983 |
1 | February | 54.147478 | 86.520062 |
2 | March | 57.056838 | 90.658533 |
3 | April | 75.867816 | 111.962267 |
4 | May | 76.657558 | 120.669827 |
px.line(final_mean2,x='Month',y=['Price for Resort Hotel','Price for City Hotel'],title='Room Price Per Night by Month')
Price for resort hotel spiked during summer, while price for city hotel slightly rose during spring to fall seasons. Overall city hotel is more expensive than resort hotel during non-summer periods.
data.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
5 rows × 32 columns
# Depending on your pandas version and settings, you may get a different number. In our case, the result turned out to be 20. That is, if a dataframe has more than 20 columns, it will skip a few columns when displaying the dataframe.
pd.get_option("display.max_columns")
20
# Settings to display all columns
pd.set_option("display.max_columns", None)
data.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
data.columns
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'arrival_date_day_of_month', 'stays_in_weekend_nights', 'stays_in_week_nights', 'adults', 'children', 'babies', 'meal', 'country', 'market_segment', 'distribution_channel', 'is_repeated_guest', 'previous_cancellations', 'previous_bookings_not_canceled', 'reserved_room_type', 'assigned_room_type', 'booking_changes', 'deposit_type', 'agent', 'company', 'days_in_waiting_list', 'customer_type', 'adr', 'required_car_parking_spaces', 'total_of_special_requests', 'reservation_status', 'reservation_status_date'], dtype='object')
plt.figure(figsize=(15,10))
sns.boxplot(x='market_segment',y='stays_in_weekend_nights',data=data,hue='hotel')
<AxesSubplot:xlabel='market_segment', ylabel='stays_in_weekend_nights'>
plt.figure(figsize=(15,10))
sns.boxplot(x='market_segment',y='stays_in_week_nights',data=data,hue='hotel')
<AxesSubplot:xlabel='market_segment', ylabel='stays_in_week_nights'>
data['meal'].value_counts()
BB 92236 HB 14458 SC 10549 Undefined 1169 FB 798 Name: meal, dtype: int64
data['meal'].value_counts().index
Index(['BB', 'HB', 'SC', 'Undefined', 'FB'], dtype='object')
# 'Hole' to make a donut chart
px.pie(data,values=data['meal'].value_counts(),names=data['meal'].value_counts().index,hole=0.5,title='What is the Most Popular Meal?')
# Majority of the guests have no special requests
sns.countplot(data['total_of_special_requests'])
C:\Users\yunge\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
<AxesSubplot:xlabel='total_of_special_requests', ylabel='count'>
# Majority of the guests have no special requests
data['total_of_special_requests'].value_counts()
0 70201 1 33183 2 12952 3 2494 4 340 5 40 Name: total_of_special_requests, dtype: int64
data['total_of_special_requests'].count()
119210
# Creating a pivot table
pivot_data=data.groupby(['total_of_special_requests','is_canceled']).agg({'total_of_special_requests':'count'})
pivot_data
total_of_special_requests | ||
---|---|---|
total_of_special_requests | is_canceled | |
0 | 0 | 36667 |
1 | 33534 | |
1 | 0 | 25867 |
1 | 7316 | |
2 | 0 | 10086 |
1 | 2866 | |
3 | 0 | 2049 |
1 | 445 | |
4 | 0 | 304 |
1 | 36 | |
5 | 0 | 38 |
1 | 2 |
pivot_data.unstack()
total_of_special_requests | ||
---|---|---|
is_canceled | 0 | 1 |
total_of_special_requests | ||
0 | 36667 | 33534 |
1 | 25867 | 7316 |
2 | 10086 | 2866 |
3 | 2049 | 445 |
4 | 304 | 36 |
5 | 38 | 2 |
# Rename the column to count
pivot=pivot_data.rename(columns={'total_of_special_requests':'count'}).unstack()
pivot.head()
count | ||
---|---|---|
is_canceled | 0 | 1 |
total_of_special_requests | ||
0 | 36667 | 33534 |
1 | 25867 | 7316 |
2 | 10086 | 2866 |
3 | 2049 | 445 |
4 | 304 | 36 |
pivot.plot(kind='bar')
<AxesSubplot:xlabel='total_of_special_requests'>
From the bar chart above, we observed at 0 special request, cancellation is nearly as high as non-cancelled bookings. When there is 1 special request, number of cancellation dropped significantly. Indicating that the existence of special requests help to decrease cancellation.
rush_resort=data_resort['arrival_date_month'].value_counts().reset_index()
rush_resort.columns=['Month','Number of Guests']
rush_resort
Month | Number of Guests | |
---|---|---|
0 | August | 3257 |
1 | July | 3137 |
2 | October | 2575 |
3 | March | 2571 |
4 | April | 2550 |
5 | May | 2535 |
6 | February | 2308 |
7 | September | 2102 |
8 | June | 2037 |
9 | December | 2014 |
10 | November | 1975 |
11 | January | 1866 |
rush_city=data_city['arrival_date_month'].value_counts().reset_index()
rush_city.columns=['Month','Number of Guests']
rush_city
Month | Number of Guests | |
---|---|---|
0 | August | 5367 |
1 | July | 4770 |
2 | May | 4568 |
3 | June | 4358 |
4 | October | 4326 |
5 | September | 4283 |
6 | March | 4049 |
7 | April | 4010 |
8 | February | 3051 |
9 | November | 2676 |
10 | December | 2377 |
11 | January | 2249 |
# Merging dataframe
final_rush=rush_resort.merge(rush_city,on='Month')
final_rush.columns=['Month','Number of Guest for Resort Hotel','Number of Guest for City Hotel']
final_rush
Month | Number of Guest for Resort Hotel | Number of Guest for City Hotel | |
---|---|---|---|
0 | August | 3257 | 5367 |
1 | July | 3137 | 4770 |
2 | October | 2575 | 4326 |
3 | March | 2571 | 4049 |
4 | April | 2550 | 4010 |
5 | May | 2535 | 4568 |
6 | February | 2308 | 3051 |
7 | September | 2102 | 4283 |
8 | June | 2037 | 4358 |
9 | December | 2014 | 2377 |
10 | November | 1975 | 2676 |
11 | January | 1866 | 2249 |
# Sort month in order
import sort_dataframeby_monthorweek as sd
final_rush2=sd.Sort_Dataframeby_Month(df=final_rush,monthcolumnname='Month')
final_rush2
Month | Number of Guest for Resort Hotel | Number of Guest for City Hotel | |
---|---|---|---|
0 | January | 1866 | 2249 |
1 | February | 2308 | 3051 |
2 | March | 2571 | 4049 |
3 | April | 2550 | 4010 |
4 | May | 2535 | 4568 |
5 | June | 2037 | 4358 |
6 | July | 3137 | 4770 |
7 | August | 3257 | 5367 |
8 | September | 2102 | 4283 |
9 | October | 2575 | 4326 |
10 | November | 1975 | 2676 |
11 | December | 2014 | 2377 |
px.line(data_frame=final_rush2,x='Month',y=['Number of Guest for Resort Hotel','Number of Guest for City Hotel'],title='Total Number of Guests Per Month')
Summer appears to be the most busy period of the year for both hotels. Interestingly, while city hotel is more expensive than resort hotel in most of the months, city hotel still beats resort hotel in terms of booking guests. Also, though resort hotel sees a huge price surge during summer, resort hotel still experiences a booking surge in summer.
filter2=data['is_canceled']==0
filter2.head()
0 True 1 True 2 True 3 True 4 True Name: is_canceled, dtype: bool
clean_data=data[filter2]
clean_data.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
clean_data.shape
(75011, 32)
filter3=data['is_canceled']==1
clean_data2=data[filter3]
clean_data2.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
8 | Resort Hotel | 1 | 85 | 2015 | July | 27 | 1 | 0 | 3 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 82.0 | 0 | 1 | Canceled | 5/6/2015 |
9 | Resort Hotel | 1 | 75 | 2015 | July | 27 | 1 | 0 | 3 | 2 | 0.0 | 0 | HB | PRT | Offline TA/TO | TA/TO | 0 | 0 | 0 | D | D | 0 | No Deposit | 15.0 | 0.0 | 0 | Transient | 105.5 | 0 | 0 | Canceled | 4/22/2015 |
10 | Resort Hotel | 1 | 23 | 2015 | July | 27 | 1 | 0 | 4 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | E | E | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 123.0 | 0 | 0 | Canceled | 6/23/2015 |
27 | Resort Hotel | 1 | 60 | 2015 | July | 27 | 1 | 2 | 5 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | E | E | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 107.0 | 0 | 2 | Canceled | 5/11/2015 |
32 | Resort Hotel | 1 | 96 | 2015 | July | 27 | 1 | 2 | 8 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | E | E | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 108.3 | 0 | 2 | Canceled | 5/29/2015 |
clean_data2.shape
(44199, 32)
44199 + 75011
119210
clean_data['total_nights']=clean_data['stays_in_weekend_nights'] + clean_data['stays_in_week_nights']
<ipython-input-81-2d03ba504a81>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
# Ignore warnings
import warnings
from warnings import filterwarnings
filterwarnings('ignore')
clean_data['total_nights']=clean_data['stays_in_weekend_nights'] + clean_data['stays_in_week_nights']
stay=clean_data.groupby(['total_nights','hotel']).agg('count').reset_index()
stay.head()
total_nights | hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | City Hotel | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 | 251 |
1 | 0 | Resort Hotel | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 | 371 |
2 | 1 | City Hotel | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 | 9155 |
3 | 1 | Resort Hotel | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 | 6579 |
4 | 2 | City Hotel | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 | 10983 |
# Choose only the first 3 columns
stay=stay.iloc[:,0:3]
stay.head()
total_nights | hotel | Number of Stays | |
---|---|---|---|
0 | 0 | City Hotel | 251 |
1 | 0 | Resort Hotel | 371 |
2 | 1 | City Hotel | 9155 |
3 | 1 | Resort Hotel | 6579 |
4 | 2 | City Hotel | 10983 |
# Rename columns
stay=stay.rename(columns={'is_canceled': 'Number of Stays'})
stay.head()
total_nights | hotel | Number of Stays | |
---|---|---|---|
0 | 0 | City Hotel | 251 |
1 | 0 | Resort Hotel | 371 |
2 | 1 | City Hotel | 9155 |
3 | 1 | Resort Hotel | 6579 |
4 | 2 | City Hotel | 10983 |
plt.figure(figsize=(20,10))
sns.barplot(x='total_nights',y='Number of Stays',hue='hotel',data=stay)
<AxesSubplot:xlabel='total_nights', ylabel='Number of Stays'>
# Use hue order to prioritize hotels color order
plt.figure(figsize=(20,10))
sns.barplot(x='total_nights',y='Number of Stays',hue='hotel',hue_order=['Resort Hotel','City Hotel'],data=stay)
<AxesSubplot:xlabel='total_nights', ylabel='Number of Stays'>
From the chart above, we can conclude that for shorter stays from 1 to 5 nights, city hotel is preferred. However when number of stays surpass 5 nights, resort hotel is the preferred choice. Also, resort hotel has more bookings when it comes to 0 night.
# Bookings by market segment
clean_data.columns
Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'arrival_date_day_of_month', 'stays_in_weekend_nights', 'stays_in_week_nights', 'adults', 'children', 'babies', 'meal', 'country', 'market_segment', 'distribution_channel', 'is_repeated_guest', 'previous_cancellations', 'previous_bookings_not_canceled', 'reserved_room_type', 'assigned_room_type', 'booking_changes', 'deposit_type', 'agent', 'company', 'days_in_waiting_list', 'customer_type', 'adr', 'required_car_parking_spaces', 'total_of_special_requests', 'reservation_status', 'reservation_status_date', 'total_nights'], dtype='object')
clean_data['market_segment'].value_counts()
Online TA 35673 Offline TA/TO 15880 Direct 10648 Groups 7697 Corporate 4291 Complementary 639 Aviation 183 Name: market_segment, dtype: int64
clean_data['market_segment'].value_counts().index
Index(['Online TA', 'Offline TA/TO', 'Direct', 'Groups', 'Corporate', 'Complementary', 'Aviation'], dtype='object')
px.pie(clean_data,values=clean_data['market_segment'].value_counts(),names=clean_data['market_segment'].value_counts().index,title='Bookings by Market Segment',hole=0.5)
cancel=data[data['is_canceled']==1]
cancel.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
8 | Resort Hotel | 1 | 85 | 2015 | July | 27 | 1 | 0 | 3 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 82.0 | 0 | 1 | Canceled | 5/6/2015 |
9 | Resort Hotel | 1 | 75 | 2015 | July | 27 | 1 | 0 | 3 | 2 | 0.0 | 0 | HB | PRT | Offline TA/TO | TA/TO | 0 | 0 | 0 | D | D | 0 | No Deposit | 15.0 | 0.0 | 0 | Transient | 105.5 | 0 | 0 | Canceled | 4/22/2015 |
10 | Resort Hotel | 1 | 23 | 2015 | July | 27 | 1 | 0 | 4 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | E | E | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 123.0 | 0 | 0 | Canceled | 6/23/2015 |
27 | Resort Hotel | 1 | 60 | 2015 | July | 27 | 1 | 2 | 5 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | E | E | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 107.0 | 0 | 2 | Canceled | 5/11/2015 |
32 | Resort Hotel | 1 | 96 | 2015 | July | 27 | 1 | 2 | 8 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | E | E | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 108.3 | 0 | 2 | Canceled | 5/29/2015 |
# How many resort hotel bookings were cancelled?
len(cancel[cancel['hotel']=='Resort Hotel'])
11120
# How many city hotel bookings were cancelled?
len(cancel[cancel['hotel']=='City Hotel'])
33079
px.pie(values=[11120,33079],names=['Resort Hotel Cancellations','City Hotel Cancellation'],hole=0.5)
75% cancellations come from city hotel, while resort hotel accounted for 25% of the cancellations.