# Exploratory Data Analysis - Bicycle Rentals in the Chicago Area

## Setup

In [1]:
#!pip install folium
#!pip install geopandas

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import sweetviz
import folium
from folium import plugins
import geopandas

In [3]:
data = pd.read_csv('../../Bike_Study_Files/big_raw.csv')
data.shape

(5733451, 13)

In [4]:
# Convert 'started_at' and 'ended_at' to datetime objects
data['started_at'] = pd.to_datetime(data['started_at'])
data['ended_at'] = pd.to_datetime(data['ended_at'])

# Calculate ride duration in minutes
data['ride_duration'] = (data['ended_at'] - data['started_at']).dt.total_seconds() / 60

# Additional columns for temporal analysis
data['start_date'] = data['started_at'].dt.date
data['start_hour'] = data['started_at'].dt.hour
data['day_of_week'] = data['started_at'].dt.day_name()
data['week_of_year'] = data['started_at'].dt.isocalendar().week

# Preview the data with new columns
data[['ride_id', 'rideable_type', 'member_casual', 'started_at', 'ended_at', 'ride_duration', 'start_date', 'start_hour', 'day_of_week', 'week_of_year']].head()

Unnamed: 0,ride_id,rideable_type,member_casual,started_at,ended_at,ride_duration,start_date,start_hour,day_of_week,week_of_year
0,46F8167220E4431F,electric_bike,member,2021-12-07 15:06:07,2021-12-07 15:13:42,7.583333,2021-12-07,15,Tuesday,49
1,73A77762838B32FD,electric_bike,casual,2021-12-11 03:43:29,2021-12-11 04:10:23,26.9,2021-12-11,3,Saturday,49
2,4CF42452054F59C5,electric_bike,member,2021-12-15 23:10:28,2021-12-15 23:23:14,12.766667,2021-12-15,23,Wednesday,50
3,3278BA87BF698339,classic_bike,member,2021-12-26 16:16:10,2021-12-26 16:30:53,14.716667,2021-12-26,16,Sunday,51
4,6FF54232576A3B73,electric_bike,member,2021-12-30 11:31:05,2021-12-30 11:51:21,20.266667,2021-12-30,11,Thursday,52


## Fun with Mapping!!!

#### Map with 1000 Samples

In [5]:
geo_loc = data[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']].sample(n=1000)
geo_loc.shape

(1000, 7)

In [6]:
geo_loc['start_lat']

2614150    41.884728
1398499    41.785046
5575848    41.895618
2531088    41.935733
4607971    41.940232
             ...    
4934673    41.892592
1588791    41.932225
1277913    41.857930
5421786    41.950000
4517928    41.932389
Name: start_lat, Length: 1000, dtype: float64

In [7]:
geo_loc.to_csv('geo_sample.csv')

In [8]:
geo_loc = pd.read_csv('geo_sample.csv')
geo_loc.shape

(1000, 8)

In [9]:
geo_loc.columns


Index(['Unnamed: 0', 'member_casual', 'rideable_type', 'ride_duration',
       'start_lat', 'start_lng', 'end_lat', 'end_lng'],
      dtype='object')

In [10]:
# Create a Folium map centered at a specific location
m = folium.Map(location=[geo_loc['start_lat'][1], geo_loc['start_lng'][1]])

# Add markers for each city
for i, row in geo_loc.iterrows():
    folium.Marker(
        location=[row['start_lat'], row['start_lng']]
    ).add_to(m)

# Display the map
m.save("geographical_sample.html")


In [11]:
m

#### Heatmap with 1,000 Samples

In [12]:
# Create point geometries
geometry = geopandas.points_from_xy(geo_loc['start_lng'], geo_loc['start_lat'])
geo_df = geopandas.GeoDataFrame(
    geo_loc[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']], 
    geometry=geometry
)

geo_df.head()

Unnamed: 0,member_casual,rideable_type,ride_duration,start_lat,start_lng,end_lat,end_lng,geometry
0,member,classic_bike,27.366667,41.884728,-87.619521,41.918306,-87.636282,POINT (-87.61952 41.88473)
1,casual,electric_bike,3.866667,41.785046,-87.601135,41.791478,-87.599861,POINT (-87.60113 41.78505)
2,member,electric_bike,8.866667,41.895618,-87.677103,41.900363,-87.696704,POINT (-87.67710 41.89562)
3,member,classic_bike,6.5,41.935733,-87.663576,41.936497,-87.647539,POINT (-87.66358 41.93573)
4,member,classic_bike,19.616667,41.940232,-87.652944,41.978353,-87.659753,POINT (-87.65294 41.94023)


In [13]:
from folium import plugins

map = folium.Map(location=[geo_loc['start_lat'][1], geo_loc['start_lng'][1]])

heat_data = [[point.xy[1][0], point.xy[0][0]] for point in geo_df.geometry]

heat_data
plugins.HeatMap(heat_data).add_to(map)

map

#### Heatmap with 100,000 Samples

In [14]:
geo_loc = data[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']].sample(n=100000)
geo_loc.shape

(100000, 7)

In [15]:
geo_loc.to_csv('100K_Samples.csv', index=False)

In [16]:
medium = pd.read_csv('100K_Samples.csv')
medium.shape

(100000, 7)

In [17]:
# Create point geometries
geometry = geopandas.points_from_xy(medium['start_lng'], medium['start_lat'])
geo_df_med = geopandas.GeoDataFrame(
    medium[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']], 
    geometry=geometry
)

geo_df_med.head()

Unnamed: 0,member_casual,rideable_type,ride_duration,start_lat,start_lng,end_lat,end_lng,geometry
0,member,electric_bike,23.183333,41.937623,-87.644041,41.883602,-87.648627,POINT (-87.64404 41.93762)
1,casual,electric_bike,12.2,41.885332,-87.635797,41.906724,-87.63483,POINT (-87.63580 41.88533)
2,member,classic_bike,1.6,41.871262,-87.673688,41.871514,-87.669886,POINT (-87.67369 41.87126)
3,member,electric_bike,6.9,41.890893,-87.618009,41.891023,-87.63548,POINT (-87.61801 41.89089)
4,casual,electric_bike,7.733333,41.794329,-87.601558,41.799568,-87.594747,POINT (-87.60156 41.79433)


In [19]:
map2 = folium.Map(location=[medium['start_lat'][1], medium['start_lng'][1]])

heat_data = [[point.xy[1][0], point.xy[0][0]] for point in geo_df_med.geometry]

heat_data
#plugins.HeatMap(heat_data).add_to(map2)

folium.Circle(
    radius=4500,  # radius in meters
    location=[41.81, -87.70],  # latitude and longitude of the center
    color='black',  # color of the circle
    fill=False,
).add_to(map2)

folium.Circle(
    radius=4000,  # radius in meters
    location=[41.93, -87.80],  # latitude and longitude of the center
    color='black',  # color of the circle
    fill=False,
).add_to(map2)

folium.Circle(
    radius=5000,  # radius in meters
    location=[41.71, -87.61],  # latitude and longitude of the center
    color='black',  # color of the circle
    fill=False,
).add_to(map2)

map2

#### Heatmap with Station Markers

In [None]:
geo_loc = data[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', 'start_lat', 'start_lng', 'end_lat', 'end_lng']].sample(n=100000)
geo_loc.shape

geo_loc.to_csv('100K_Samples_2.csv', index=False)

In [None]:
medium2 = pd.read_csv('100K_Samples_2.csv')
medium2.shape

In [None]:
medium2 = medium2.sample(n=1000)

In [None]:
# Create point geometries
geometry = geopandas.points_from_xy(medium2['start_lng'], medium2['start_lat'])
geo_df_med2 = geopandas.GeoDataFrame(
    medium2[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', 'start_lat', 'start_lng', 'end_lat', 'end_lng']], 
    geometry=geometry
)

geo_df_med2.head()

In [None]:
pick_index = 6837

medium2['start_lat'][pick_index]

In [None]:
map3 = folium.Map(location=[medium2['start_lat'][pick_index], medium2['start_lng'][pick_index]])

# Add markers for each station
for i, row in medium2.iterrows():
    folium.Marker(
        location=[row['start_lat'], row['start_lng']],
        tooltip = row['start_station_name']
    ).add_to(map3)

heat_data = [[point.xy[1][0], point.xy[0][0]] for point in geo_df_med2.geometry]

heat_data
plugins.HeatMap(heat_data).add_to(map3)

map3

### Geographical Distribution: Member vs Casual

#### Sample Size 5,000

In [None]:
hundred_K = pd.read_csv('100K_Samples_2.csv')
hundred_K.shape

In [None]:
five_K = hundred_K.sample(n=5000)
five_K.shape

In [None]:
five_K_groups = five_K.groupby('member_casual')

In [None]:
casual = five_K_groups.get_group('casual')
members = five_K_groups.get_group('member')
print(casual.shape, members.shape)

In [None]:
start_loc = [41.88, -87.62]
label_loc = [41.88, -87.62]

combined_map = plugins.DualMap(location=start_loc, tiles='openstreetmap', zoom_start=14)

# HeatMap for Casual
casual.reset_index(drop=True)
geometry_c = geopandas.points_from_xy(casual['start_lng'], casual['start_lat'])
geo_group_c = geopandas.GeoDataFrame(
        casual[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', 
                'start_lat', 'start_lng', 'end_lat', 'end_lng']], 
        geometry=geometry_c
)

heat_data_c = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_c.geometry]
plugins.HeatMap(heat_data_c).add_to(combined_map.m2)


# HeatMap for Members
members.reset_index(drop=True)
geometry_m = geopandas.points_from_xy(members['start_lng'], members['start_lat'])
geo_group_m = geopandas.GeoDataFrame(
        members[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', 
                'start_lat', 'start_lng', 'end_lat', 'end_lng']], 
        geometry=geometry_m
)

heat_data_m = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_m.geometry]
plugins.HeatMap(heat_data_m).add_to(combined_map.m1)


combined_map

In [None]:
one_K_casual = casual.sample(n=1000)
one_K_members = members.sample(n=1000)

In [None]:
start_loc = [41.88, -87.62]
label_loc = [41.88, -87.62]

combined_map_even = plugins.DualMap(location=start_loc, tiles='openstreetmap', zoom_start=10)

# HeatMap for Casual
one_K_casual.reset_index(drop=True)
geometry_c = geopandas.points_from_xy(one_K_casual['start_lng'], one_K_casual['start_lat'])
geo_group_c = geopandas.GeoDataFrame(
       one_K_casual[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', 
                'start_lat', 'start_lng', 'end_lat', 'end_lng']], 
        geometry=geometry_c
)

heat_data_c = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_c.geometry]

plugins.HeatMap(heat_data_c).add_to(combined_map_even.m2)



# HeatMap for Members
one_K_members.reset_index(drop=True)
geometry_m = geopandas.points_from_xy(one_K_members['start_lng'], one_K_members['start_lat'])
geo_group_m = geopandas.GeoDataFrame(
        one_K_members[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', 
                'start_lat', 'start_lng', 'end_lat', 'end_lng']], 
        geometry=geometry_m
)

heat_data_m = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_m.geometry]
plugins.HeatMap(heat_data_m).add_to(combined_map_even.m1)

folium.Circle(
    radius=4500,  # radius in meters
    location=[41.81, -87.70],  # latitude and longitude of the center
    color='black',  # color of the circle
    fill=False,
).add_to(combined_map_even)

folium.Circle(
    radius=4000,  # radius in meters
    location=[41.93, -87.80],  # latitude and longitude of the center
    color='black',  # color of the circle
    fill=False,
).add_to(combined_map_even)

folium.Circle(
    radius=5000,  # radius in meters
    location=[41.71, -87.61],  # latitude and longitude of the center
    color='black',  # color of the circle
    fill=False,
).add_to(combined_map_even)

combined_map_even