{
"cells": [
{
"cell_type": "markdown",
"id": "fb0f3289-8d54-4adc-b2e4-6e83ad1e43a9",
"metadata": {
"tags": []
},
"source": [
"# Exploratory Data Analysis - Bicycle Rentals in the Chicago Area"
]
},
{
"cell_type": "markdown",
"id": "04d8d599-4fd3-41c9-a970-d49c7f0d2d56",
"metadata": {},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "fc41529d-0d0b-4659-81f9-25dd0a113840",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:19:16.306365Z",
"start_time": "2023-11-30T19:19:16.293189Z"
},
"tags": []
},
"outputs": [],
"source": [
"#!pip install folium\n",
"#!pip install geopandas"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c069433a-99c2-4d54-af79-b0c47e728556",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:19:19.723246Z",
"start_time": "2023-11-30T19:19:16.309269Z"
},
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from scipy import stats\n",
"import sweetviz\n",
"import folium\n",
"from folium import plugins\n",
"import geopandas"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f4a67f0a",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:19:46.330919Z",
"start_time": "2023-11-30T19:19:19.725815Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(5733451, 13)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.read_csv('../../Bike_Study_Files/big_raw.csv')\n",
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "583355e2",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:01.883145Z",
"start_time": "2023-11-30T19:19:46.347139Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ride_id | \n",
" rideable_type | \n",
" member_casual | \n",
" started_at | \n",
" ended_at | \n",
" ride_duration | \n",
" start_date | \n",
" start_hour | \n",
" day_of_week | \n",
" week_of_year | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 46F8167220E4431F | \n",
" electric_bike | \n",
" member | \n",
" 2021-12-07 15:06:07 | \n",
" 2021-12-07 15:13:42 | \n",
" 7.583333 | \n",
" 2021-12-07 | \n",
" 15 | \n",
" Tuesday | \n",
" 49 | \n",
"
\n",
" \n",
" 1 | \n",
" 73A77762838B32FD | \n",
" electric_bike | \n",
" casual | \n",
" 2021-12-11 03:43:29 | \n",
" 2021-12-11 04:10:23 | \n",
" 26.900000 | \n",
" 2021-12-11 | \n",
" 3 | \n",
" Saturday | \n",
" 49 | \n",
"
\n",
" \n",
" 2 | \n",
" 4CF42452054F59C5 | \n",
" electric_bike | \n",
" member | \n",
" 2021-12-15 23:10:28 | \n",
" 2021-12-15 23:23:14 | \n",
" 12.766667 | \n",
" 2021-12-15 | \n",
" 23 | \n",
" Wednesday | \n",
" 50 | \n",
"
\n",
" \n",
" 3 | \n",
" 3278BA87BF698339 | \n",
" classic_bike | \n",
" member | \n",
" 2021-12-26 16:16:10 | \n",
" 2021-12-26 16:30:53 | \n",
" 14.716667 | \n",
" 2021-12-26 | \n",
" 16 | \n",
" Sunday | \n",
" 51 | \n",
"
\n",
" \n",
" 4 | \n",
" 6FF54232576A3B73 | \n",
" electric_bike | \n",
" member | \n",
" 2021-12-30 11:31:05 | \n",
" 2021-12-30 11:51:21 | \n",
" 20.266667 | \n",
" 2021-12-30 | \n",
" 11 | \n",
" Thursday | \n",
" 52 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ride_id rideable_type member_casual started_at \\\n",
"0 46F8167220E4431F electric_bike member 2021-12-07 15:06:07 \n",
"1 73A77762838B32FD electric_bike casual 2021-12-11 03:43:29 \n",
"2 4CF42452054F59C5 electric_bike member 2021-12-15 23:10:28 \n",
"3 3278BA87BF698339 classic_bike member 2021-12-26 16:16:10 \n",
"4 6FF54232576A3B73 electric_bike member 2021-12-30 11:31:05 \n",
"\n",
" ended_at ride_duration start_date start_hour day_of_week \\\n",
"0 2021-12-07 15:13:42 7.583333 2021-12-07 15 Tuesday \n",
"1 2021-12-11 04:10:23 26.900000 2021-12-11 3 Saturday \n",
"2 2021-12-15 23:23:14 12.766667 2021-12-15 23 Wednesday \n",
"3 2021-12-26 16:30:53 14.716667 2021-12-26 16 Sunday \n",
"4 2021-12-30 11:51:21 20.266667 2021-12-30 11 Thursday \n",
"\n",
" week_of_year \n",
"0 49 \n",
"1 49 \n",
"2 50 \n",
"3 51 \n",
"4 52 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Convert 'started_at' and 'ended_at' to datetime objects\n",
"data['started_at'] = pd.to_datetime(data['started_at'])\n",
"data['ended_at'] = pd.to_datetime(data['ended_at'])\n",
"\n",
"# Calculate ride duration in minutes\n",
"data['ride_duration'] = (data['ended_at'] - data['started_at']).dt.total_seconds() / 60\n",
"\n",
"# Additional columns for temporal analysis\n",
"data['start_date'] = data['started_at'].dt.date\n",
"data['start_hour'] = data['started_at'].dt.hour\n",
"data['day_of_week'] = data['started_at'].dt.day_name()\n",
"data['week_of_year'] = data['started_at'].dt.isocalendar().week\n",
"\n",
"# Preview the data with new columns\n",
"data[['ride_id', 'rideable_type', 'member_casual', 'started_at', 'ended_at', 'ride_duration', 'start_date', 'start_hour', 'day_of_week', 'week_of_year']].head()"
]
},
{
"cell_type": "markdown",
"id": "90cad0f8",
"metadata": {},
"source": [
"## Fun with Mapping!!!"
]
},
{
"cell_type": "markdown",
"id": "30522ffd",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
"#### Map with 1000 Samples"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a494670d-aecc-4ff9-91e1-84592372bf1a",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:02.400110Z",
"start_time": "2023-11-30T19:20:01.886155Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"(1000, 7)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"geo_loc = data[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']].sample(n=1000)\n",
"geo_loc.shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "89506f66-0f54-410d-9b29-63472748cdfe",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:02.431320Z",
"start_time": "2023-11-30T19:20:02.402119Z"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"2614150 41.884728\n",
"1398499 41.785046\n",
"5575848 41.895618\n",
"2531088 41.935733\n",
"4607971 41.940232\n",
" ... \n",
"4934673 41.892592\n",
"1588791 41.932225\n",
"1277913 41.857930\n",
"5421786 41.950000\n",
"4517928 41.932389\n",
"Name: start_lat, Length: 1000, dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"geo_loc['start_lat']"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0316100c",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:02.460977Z",
"start_time": "2023-11-30T19:20:02.434278Z"
}
},
"outputs": [],
"source": [
"geo_loc.to_csv('geo_sample.csv')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "3f16917a",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:02.492071Z",
"start_time": "2023-11-30T19:20:02.464985Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(1000, 8)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"geo_loc = pd.read_csv('geo_sample.csv')\n",
"geo_loc.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "5cf27f8c",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:02.508093Z",
"start_time": "2023-11-30T19:20:02.496070Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Unnamed: 0', 'member_casual', 'rideable_type', 'ride_duration',\n",
" 'start_lat', 'start_lng', 'end_lat', 'end_lng'],\n",
" dtype='object')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"geo_loc.columns\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "00b616a2-4550-4f30-8bda-1f757eea43cd",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:03.023744Z",
"start_time": "2023-11-30T19:20:02.517099Z"
},
"tags": []
},
"outputs": [],
"source": [
"# Create a Folium map centered at a specific location\n",
"m = folium.Map(location=[geo_loc['start_lat'][1], geo_loc['start_lng'][1]])\n",
"\n",
"# Add markers for each city\n",
"for i, row in geo_loc.iterrows():\n",
" folium.Marker(\n",
" location=[row['start_lat'], row['start_lng']]\n",
" ).add_to(m)\n",
"\n",
"# Display the map\n",
"m.save(\"geographical_sample.html\")\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "f6802bc6-ca33-451a-a59b-180c14923180",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:03.677983Z",
"start_time": "2023-11-30T19:20:03.028640Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"Make this Notebook Trusted to load map: File -> Trust Notebook
"
],
"text/plain": [
""
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m"
]
},
{
"cell_type": "markdown",
"id": "c8be691f",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
"#### Heatmap with 1,000 Samples"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "1438d5d5",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:03.724738Z",
"start_time": "2023-11-30T19:20:03.680983Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" member_casual | \n",
" rideable_type | \n",
" ride_duration | \n",
" start_lat | \n",
" start_lng | \n",
" end_lat | \n",
" end_lng | \n",
" geometry | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" member | \n",
" classic_bike | \n",
" 27.366667 | \n",
" 41.884728 | \n",
" -87.619521 | \n",
" 41.918306 | \n",
" -87.636282 | \n",
" POINT (-87.61952 41.88473) | \n",
"
\n",
" \n",
" 1 | \n",
" casual | \n",
" electric_bike | \n",
" 3.866667 | \n",
" 41.785046 | \n",
" -87.601135 | \n",
" 41.791478 | \n",
" -87.599861 | \n",
" POINT (-87.60113 41.78505) | \n",
"
\n",
" \n",
" 2 | \n",
" member | \n",
" electric_bike | \n",
" 8.866667 | \n",
" 41.895618 | \n",
" -87.677103 | \n",
" 41.900363 | \n",
" -87.696704 | \n",
" POINT (-87.67710 41.89562) | \n",
"
\n",
" \n",
" 3 | \n",
" member | \n",
" classic_bike | \n",
" 6.500000 | \n",
" 41.935733 | \n",
" -87.663576 | \n",
" 41.936497 | \n",
" -87.647539 | \n",
" POINT (-87.66358 41.93573) | \n",
"
\n",
" \n",
" 4 | \n",
" member | \n",
" classic_bike | \n",
" 19.616667 | \n",
" 41.940232 | \n",
" -87.652944 | \n",
" 41.978353 | \n",
" -87.659753 | \n",
" POINT (-87.65294 41.94023) | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" member_casual rideable_type ride_duration start_lat start_lng \\\n",
"0 member classic_bike 27.366667 41.884728 -87.619521 \n",
"1 casual electric_bike 3.866667 41.785046 -87.601135 \n",
"2 member electric_bike 8.866667 41.895618 -87.677103 \n",
"3 member classic_bike 6.500000 41.935733 -87.663576 \n",
"4 member classic_bike 19.616667 41.940232 -87.652944 \n",
"\n",
" end_lat end_lng geometry \n",
"0 41.918306 -87.636282 POINT (-87.61952 41.88473) \n",
"1 41.791478 -87.599861 POINT (-87.60113 41.78505) \n",
"2 41.900363 -87.696704 POINT (-87.67710 41.89562) \n",
"3 41.936497 -87.647539 POINT (-87.66358 41.93573) \n",
"4 41.978353 -87.659753 POINT (-87.65294 41.94023) "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create point geometries\n",
"geometry = geopandas.points_from_xy(geo_loc['start_lng'], geo_loc['start_lat'])\n",
"geo_df = geopandas.GeoDataFrame(\n",
" geo_loc[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n",
" geometry=geometry\n",
")\n",
"\n",
"geo_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "37154e0c",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:03.833311Z",
"start_time": "2023-11-30T19:20:03.728744Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"Make this Notebook Trusted to load map: File -> Trust Notebook
"
],
"text/plain": [
""
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from folium import plugins\n",
"\n",
"map = folium.Map(location=[geo_loc['start_lat'][1], geo_loc['start_lng'][1]])\n",
"\n",
"heat_data = [[point.xy[1][0], point.xy[0][0]] for point in geo_df.geometry]\n",
"\n",
"heat_data\n",
"plugins.HeatMap(heat_data).add_to(map)\n",
"\n",
"map"
]
},
{
"cell_type": "markdown",
"id": "55c9890d",
"metadata": {
"jp-MarkdownHeadingCollapsed": true,
"tags": []
},
"source": [
"#### Heatmap with 100,000 Samples"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "775faca6",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:04.444714Z",
"start_time": "2023-11-30T19:20:03.835311Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(100000, 7)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"geo_loc = data[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']].sample(n=100000)\n",
"geo_loc.shape"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "d50df5e1",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:05.176902Z",
"start_time": "2023-11-30T19:20:04.447679Z"
}
},
"outputs": [],
"source": [
"geo_loc.to_csv('100K_Samples.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "a9a35619",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:05.348814Z",
"start_time": "2023-11-30T19:20:05.179876Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(100000, 7)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"medium = pd.read_csv('100K_Samples.csv')\n",
"medium.shape"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "bba85fca",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:05.504576Z",
"start_time": "2023-11-30T19:20:05.351692Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" member_casual | \n",
" rideable_type | \n",
" ride_duration | \n",
" start_lat | \n",
" start_lng | \n",
" end_lat | \n",
" end_lng | \n",
" geometry | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" member | \n",
" electric_bike | \n",
" 23.183333 | \n",
" 41.937623 | \n",
" -87.644041 | \n",
" 41.883602 | \n",
" -87.648627 | \n",
" POINT (-87.64404 41.93762) | \n",
"
\n",
" \n",
" 1 | \n",
" casual | \n",
" electric_bike | \n",
" 12.200000 | \n",
" 41.885332 | \n",
" -87.635797 | \n",
" 41.906724 | \n",
" -87.634830 | \n",
" POINT (-87.63580 41.88533) | \n",
"
\n",
" \n",
" 2 | \n",
" member | \n",
" classic_bike | \n",
" 1.600000 | \n",
" 41.871262 | \n",
" -87.673688 | \n",
" 41.871514 | \n",
" -87.669886 | \n",
" POINT (-87.67369 41.87126) | \n",
"
\n",
" \n",
" 3 | \n",
" member | \n",
" electric_bike | \n",
" 6.900000 | \n",
" 41.890893 | \n",
" -87.618009 | \n",
" 41.891023 | \n",
" -87.635480 | \n",
" POINT (-87.61801 41.89089) | \n",
"
\n",
" \n",
" 4 | \n",
" casual | \n",
" electric_bike | \n",
" 7.733333 | \n",
" 41.794329 | \n",
" -87.601558 | \n",
" 41.799568 | \n",
" -87.594747 | \n",
" POINT (-87.60156 41.79433) | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" member_casual rideable_type ride_duration start_lat start_lng \\\n",
"0 member electric_bike 23.183333 41.937623 -87.644041 \n",
"1 casual electric_bike 12.200000 41.885332 -87.635797 \n",
"2 member classic_bike 1.600000 41.871262 -87.673688 \n",
"3 member electric_bike 6.900000 41.890893 -87.618009 \n",
"4 casual electric_bike 7.733333 41.794329 -87.601558 \n",
"\n",
" end_lat end_lng geometry \n",
"0 41.883602 -87.648627 POINT (-87.64404 41.93762) \n",
"1 41.906724 -87.634830 POINT (-87.63580 41.88533) \n",
"2 41.871514 -87.669886 POINT (-87.67369 41.87126) \n",
"3 41.891023 -87.635480 POINT (-87.61801 41.89089) \n",
"4 41.799568 -87.594747 POINT (-87.60156 41.79433) "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create point geometries\n",
"geometry = geopandas.points_from_xy(medium['start_lng'], medium['start_lat'])\n",
"geo_df_med = geopandas.GeoDataFrame(\n",
" medium[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n",
" geometry=geometry\n",
")\n",
"\n",
"geo_df_med.head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "4cd67665",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:37.522788Z",
"start_time": "2023-11-30T19:20:34.760165Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"Make this Notebook Trusted to load map: File -> Trust Notebook
"
],
"text/plain": [
""
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"map2 = folium.Map(location=[medium['start_lat'][1], medium['start_lng'][1]])\n",
"\n",
"heat_data = [[point.xy[1][0], point.xy[0][0]] for point in geo_df_med.geometry]\n",
"\n",
"heat_data\n",
"#plugins.HeatMap(heat_data).add_to(map2)\n",
"\n",
"folium.Circle(\n",
" radius=4500, # radius in meters\n",
" location=[41.81, -87.70], # latitude and longitude of the center\n",
" color='black', # color of the circle\n",
" fill=False,\n",
").add_to(map2)\n",
"\n",
"folium.Circle(\n",
" radius=4000, # radius in meters\n",
" location=[41.93, -87.80], # latitude and longitude of the center\n",
" color='black', # color of the circle\n",
" fill=False,\n",
").add_to(map2)\n",
"\n",
"folium.Circle(\n",
" radius=5000, # radius in meters\n",
" location=[41.71, -87.61], # latitude and longitude of the center\n",
" color='black', # color of the circle\n",
" fill=False,\n",
").add_to(map2)\n",
"\n",
"map2"
]
},
{
"cell_type": "markdown",
"id": "f4a78884",
"metadata": {},
"source": [
"#### Heatmap with Station Markers"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e3618133",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.553936Z",
"start_time": "2023-11-30T19:20:09.553936Z"
}
},
"outputs": [],
"source": [
"geo_loc = data[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', 'start_lat', 'start_lng', 'end_lat', 'end_lng']].sample(n=100000)\n",
"geo_loc.shape\n",
"\n",
"geo_loc.to_csv('100K_Samples_2.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf698120",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.556937Z",
"start_time": "2023-11-30T19:20:09.556937Z"
},
"tags": []
},
"outputs": [],
"source": [
"medium2 = pd.read_csv('100K_Samples_2.csv')\n",
"medium2.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22ebb974",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.559013Z",
"start_time": "2023-11-30T19:20:09.559013Z"
}
},
"outputs": [],
"source": [
"medium2 = medium2.sample(n=1000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "07f70c3b",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.560937Z",
"start_time": "2023-11-30T19:20:09.560937Z"
},
"tags": []
},
"outputs": [],
"source": [
"# Create point geometries\n",
"geometry = geopandas.points_from_xy(medium2['start_lng'], medium2['start_lat'])\n",
"geo_df_med2 = geopandas.GeoDataFrame(\n",
" medium2[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n",
" geometry=geometry\n",
")\n",
"\n",
"geo_df_med2.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "50c83236",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.561958Z",
"start_time": "2023-11-30T19:20:09.561958Z"
}
},
"outputs": [],
"source": [
"pick_index = 6837\n",
"\n",
"medium2['start_lat'][pick_index]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f5c3cebe",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.563937Z",
"start_time": "2023-11-30T19:20:09.563937Z"
},
"tags": []
},
"outputs": [],
"source": [
"map3 = folium.Map(location=[medium2['start_lat'][pick_index], medium2['start_lng'][pick_index]])\n",
"\n",
"# Add markers for each station\n",
"for i, row in medium2.iterrows():\n",
" folium.Marker(\n",
" location=[row['start_lat'], row['start_lng']],\n",
" tooltip = row['start_station_name']\n",
" ).add_to(map3)\n",
"\n",
"heat_data = [[point.xy[1][0], point.xy[0][0]] for point in geo_df_med2.geometry]\n",
"\n",
"heat_data\n",
"plugins.HeatMap(heat_data).add_to(map3)\n",
"\n",
"map3"
]
},
{
"cell_type": "markdown",
"id": "e1de17a3",
"metadata": {},
"source": [
"### Geographical Distribution: Member vs Casual"
]
},
{
"cell_type": "markdown",
"id": "ea6ccca1",
"metadata": {},
"source": [
"#### Sample Size 5,000"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f26ee597",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.565941Z",
"start_time": "2023-11-30T19:20:09.565941Z"
}
},
"outputs": [],
"source": [
"hundred_K = pd.read_csv('100K_Samples_2.csv')\n",
"hundred_K.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bf469096",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.567938Z",
"start_time": "2023-11-30T19:20:09.567938Z"
}
},
"outputs": [],
"source": [
"five_K = hundred_K.sample(n=5000)\n",
"five_K.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d435505",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.568937Z",
"start_time": "2023-11-30T19:20:09.568937Z"
}
},
"outputs": [],
"source": [
"five_K_groups = five_K.groupby('member_casual')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13274590",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.571937Z",
"start_time": "2023-11-30T19:20:09.571937Z"
},
"scrolled": true
},
"outputs": [],
"source": [
"casual = five_K_groups.get_group('casual')\n",
"members = five_K_groups.get_group('member')\n",
"print(casual.shape, members.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "33f72fc9",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.572938Z",
"start_time": "2023-11-30T19:20:09.572938Z"
}
},
"outputs": [],
"source": [
"start_loc = [41.88, -87.62]\n",
"label_loc = [41.88, -87.62]\n",
"\n",
"combined_map = plugins.DualMap(location=start_loc, tiles='openstreetmap', zoom_start=14)\n",
"\n",
"# HeatMap for Casual\n",
"casual.reset_index(drop=True)\n",
"geometry_c = geopandas.points_from_xy(casual['start_lng'], casual['start_lat'])\n",
"geo_group_c = geopandas.GeoDataFrame(\n",
" casual[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', \n",
" 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n",
" geometry=geometry_c\n",
")\n",
"\n",
"heat_data_c = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_c.geometry]\n",
"plugins.HeatMap(heat_data_c).add_to(combined_map.m2)\n",
"\n",
"\n",
"# HeatMap for Members\n",
"members.reset_index(drop=True)\n",
"geometry_m = geopandas.points_from_xy(members['start_lng'], members['start_lat'])\n",
"geo_group_m = geopandas.GeoDataFrame(\n",
" members[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', \n",
" 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n",
" geometry=geometry_m\n",
")\n",
"\n",
"heat_data_m = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_m.geometry]\n",
"plugins.HeatMap(heat_data_m).add_to(combined_map.m1)\n",
"\n",
"\n",
"combined_map"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4dd3e07",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.574971Z",
"start_time": "2023-11-30T19:20:09.574971Z"
}
},
"outputs": [],
"source": [
"one_K_casual = casual.sample(n=1000)\n",
"one_K_members = members.sample(n=1000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be231133",
"metadata": {
"ExecuteTime": {
"end_time": "2023-11-30T19:20:09.577958Z",
"start_time": "2023-11-30T19:20:09.577958Z"
}
},
"outputs": [],
"source": [
"start_loc = [41.88, -87.62]\n",
"label_loc = [41.88, -87.62]\n",
"\n",
"combined_map_even = plugins.DualMap(location=start_loc, tiles='openstreetmap', zoom_start=10)\n",
"\n",
"# HeatMap for Casual\n",
"one_K_casual.reset_index(drop=True)\n",
"geometry_c = geopandas.points_from_xy(one_K_casual['start_lng'], one_K_casual['start_lat'])\n",
"geo_group_c = geopandas.GeoDataFrame(\n",
" one_K_casual[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', \n",
" 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n",
" geometry=geometry_c\n",
")\n",
"\n",
"heat_data_c = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_c.geometry]\n",
"\n",
"plugins.HeatMap(heat_data_c).add_to(combined_map_even.m2)\n",
"\n",
"\n",
"\n",
"# HeatMap for Members\n",
"one_K_members.reset_index(drop=True)\n",
"geometry_m = geopandas.points_from_xy(one_K_members['start_lng'], one_K_members['start_lat'])\n",
"geo_group_m = geopandas.GeoDataFrame(\n",
" one_K_members[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', \n",
" 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n",
" geometry=geometry_m\n",
")\n",
"\n",
"heat_data_m = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_m.geometry]\n",
"plugins.HeatMap(heat_data_m).add_to(combined_map_even.m1)\n",
"\n",
"folium.Circle(\n",
" radius=4500, # radius in meters\n",
" location=[41.81, -87.70], # latitude and longitude of the center\n",
" color='black', # color of the circle\n",
" fill=False,\n",
").add_to(combined_map_even)\n",
"\n",
"folium.Circle(\n",
" radius=4000, # radius in meters\n",
" location=[41.93, -87.80], # latitude and longitude of the center\n",
" color='black', # color of the circle\n",
" fill=False,\n",
").add_to(combined_map_even)\n",
"\n",
"folium.Circle(\n",
" radius=5000, # radius in meters\n",
" location=[41.71, -87.61], # latitude and longitude of the center\n",
" color='black', # color of the circle\n",
" fill=False,\n",
").add_to(combined_map_even)\n",
"\n",
"combined_map_even"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "337e4769",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "aab065a9",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "298.594px"
},
"toc_section_display": true,
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 5
}