{ "cells": [ { "cell_type": "markdown", "id": "fb0f3289-8d54-4adc-b2e4-6e83ad1e43a9", "metadata": { "tags": [] }, "source": [ "# Exploratory Data Analysis - Bicycle Rentals in the Chicago Area" ] }, { "cell_type": "markdown", "id": "04d8d599-4fd3-41c9-a970-d49c7f0d2d56", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": 1, "id": "fc41529d-0d0b-4659-81f9-25dd0a113840", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:19:16.306365Z", "start_time": "2023-11-30T19:19:16.293189Z" }, "tags": [] }, "outputs": [], "source": [ "#!pip install folium\n", "#!pip install geopandas" ] }, { "cell_type": "code", "execution_count": 2, "id": "c069433a-99c2-4d54-af79-b0c47e728556", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:19:19.723246Z", "start_time": "2023-11-30T19:19:16.309269Z" }, "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from scipy import stats\n", "import sweetviz\n", "import folium\n", "from folium import plugins\n", "import geopandas" ] }, { "cell_type": "code", "execution_count": 3, "id": "f4a67f0a", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:19:46.330919Z", "start_time": "2023-11-30T19:19:19.725815Z" } }, "outputs": [ { "data": { "text/plain": [ "(5733451, 13)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.read_csv('../../Bike_Study_Files/big_raw.csv')\n", "data.shape" ] }, { "cell_type": "code", "execution_count": 4, "id": "583355e2", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:01.883145Z", "start_time": "2023-11-30T19:19:46.347139Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ride_idrideable_typemember_casualstarted_atended_atride_durationstart_datestart_hourday_of_weekweek_of_year
046F8167220E4431Felectric_bikemember2021-12-07 15:06:072021-12-07 15:13:427.5833332021-12-0715Tuesday49
173A77762838B32FDelectric_bikecasual2021-12-11 03:43:292021-12-11 04:10:2326.9000002021-12-113Saturday49
24CF42452054F59C5electric_bikemember2021-12-15 23:10:282021-12-15 23:23:1412.7666672021-12-1523Wednesday50
33278BA87BF698339classic_bikemember2021-12-26 16:16:102021-12-26 16:30:5314.7166672021-12-2616Sunday51
46FF54232576A3B73electric_bikemember2021-12-30 11:31:052021-12-30 11:51:2120.2666672021-12-3011Thursday52
\n", "
" ], "text/plain": [ " ride_id rideable_type member_casual started_at \\\n", "0 46F8167220E4431F electric_bike member 2021-12-07 15:06:07 \n", "1 73A77762838B32FD electric_bike casual 2021-12-11 03:43:29 \n", "2 4CF42452054F59C5 electric_bike member 2021-12-15 23:10:28 \n", "3 3278BA87BF698339 classic_bike member 2021-12-26 16:16:10 \n", "4 6FF54232576A3B73 electric_bike member 2021-12-30 11:31:05 \n", "\n", " ended_at ride_duration start_date start_hour day_of_week \\\n", "0 2021-12-07 15:13:42 7.583333 2021-12-07 15 Tuesday \n", "1 2021-12-11 04:10:23 26.900000 2021-12-11 3 Saturday \n", "2 2021-12-15 23:23:14 12.766667 2021-12-15 23 Wednesday \n", "3 2021-12-26 16:30:53 14.716667 2021-12-26 16 Sunday \n", "4 2021-12-30 11:51:21 20.266667 2021-12-30 11 Thursday \n", "\n", " week_of_year \n", "0 49 \n", "1 49 \n", "2 50 \n", "3 51 \n", "4 52 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Convert 'started_at' and 'ended_at' to datetime objects\n", "data['started_at'] = pd.to_datetime(data['started_at'])\n", "data['ended_at'] = pd.to_datetime(data['ended_at'])\n", "\n", "# Calculate ride duration in minutes\n", "data['ride_duration'] = (data['ended_at'] - data['started_at']).dt.total_seconds() / 60\n", "\n", "# Additional columns for temporal analysis\n", "data['start_date'] = data['started_at'].dt.date\n", "data['start_hour'] = data['started_at'].dt.hour\n", "data['day_of_week'] = data['started_at'].dt.day_name()\n", "data['week_of_year'] = data['started_at'].dt.isocalendar().week\n", "\n", "# Preview the data with new columns\n", "data[['ride_id', 'rideable_type', 'member_casual', 'started_at', 'ended_at', 'ride_duration', 'start_date', 'start_hour', 'day_of_week', 'week_of_year']].head()" ] }, { "cell_type": "markdown", "id": "90cad0f8", "metadata": {}, "source": [ "## Fun with Mapping!!!" ] }, { "cell_type": "markdown", "id": "30522ffd", "metadata": { "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ "#### Map with 1000 Samples" ] }, { "cell_type": "code", "execution_count": 5, "id": "a494670d-aecc-4ff9-91e1-84592372bf1a", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:02.400110Z", "start_time": "2023-11-30T19:20:01.886155Z" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(1000, 7)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geo_loc = data[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']].sample(n=1000)\n", "geo_loc.shape" ] }, { "cell_type": "code", "execution_count": 6, "id": "89506f66-0f54-410d-9b29-63472748cdfe", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:02.431320Z", "start_time": "2023-11-30T19:20:02.402119Z" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "2614150 41.884728\n", "1398499 41.785046\n", "5575848 41.895618\n", "2531088 41.935733\n", "4607971 41.940232\n", " ... \n", "4934673 41.892592\n", "1588791 41.932225\n", "1277913 41.857930\n", "5421786 41.950000\n", "4517928 41.932389\n", "Name: start_lat, Length: 1000, dtype: float64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geo_loc['start_lat']" ] }, { "cell_type": "code", "execution_count": 7, "id": "0316100c", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:02.460977Z", "start_time": "2023-11-30T19:20:02.434278Z" } }, "outputs": [], "source": [ "geo_loc.to_csv('geo_sample.csv')" ] }, { "cell_type": "code", "execution_count": 8, "id": "3f16917a", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:02.492071Z", "start_time": "2023-11-30T19:20:02.464985Z" } }, "outputs": [ { "data": { "text/plain": [ "(1000, 8)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geo_loc = pd.read_csv('geo_sample.csv')\n", "geo_loc.shape" ] }, { "cell_type": "code", "execution_count": 9, "id": "5cf27f8c", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:02.508093Z", "start_time": "2023-11-30T19:20:02.496070Z" } }, "outputs": [ { "data": { "text/plain": [ "Index(['Unnamed: 0', 'member_casual', 'rideable_type', 'ride_duration',\n", " 'start_lat', 'start_lng', 'end_lat', 'end_lng'],\n", " dtype='object')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geo_loc.columns\n" ] }, { "cell_type": "code", "execution_count": 10, "id": "00b616a2-4550-4f30-8bda-1f757eea43cd", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:03.023744Z", "start_time": "2023-11-30T19:20:02.517099Z" }, "tags": [] }, "outputs": [], "source": [ "# Create a Folium map centered at a specific location\n", "m = folium.Map(location=[geo_loc['start_lat'][1], geo_loc['start_lng'][1]])\n", "\n", "# Add markers for each city\n", "for i, row in geo_loc.iterrows():\n", " folium.Marker(\n", " location=[row['start_lat'], row['start_lng']]\n", " ).add_to(m)\n", "\n", "# Display the map\n", "m.save(\"geographical_sample.html\")\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "f6802bc6-ca33-451a-a59b-180c14923180", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:03.677983Z", "start_time": "2023-11-30T19:20:03.028640Z" } }, "outputs": [ { "data": { "text/html": [ "
Make this Notebook Trusted to load map: File -> Trust Notebook
" ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m" ] }, { "cell_type": "markdown", "id": "c8be691f", "metadata": { "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ "#### Heatmap with 1,000 Samples" ] }, { "cell_type": "code", "execution_count": 12, "id": "1438d5d5", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:03.724738Z", "start_time": "2023-11-30T19:20:03.680983Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
member_casualrideable_typeride_durationstart_latstart_lngend_latend_lnggeometry
0memberclassic_bike27.36666741.884728-87.61952141.918306-87.636282POINT (-87.61952 41.88473)
1casualelectric_bike3.86666741.785046-87.60113541.791478-87.599861POINT (-87.60113 41.78505)
2memberelectric_bike8.86666741.895618-87.67710341.900363-87.696704POINT (-87.67710 41.89562)
3memberclassic_bike6.50000041.935733-87.66357641.936497-87.647539POINT (-87.66358 41.93573)
4memberclassic_bike19.61666741.940232-87.65294441.978353-87.659753POINT (-87.65294 41.94023)
\n", "
" ], "text/plain": [ " member_casual rideable_type ride_duration start_lat start_lng \\\n", "0 member classic_bike 27.366667 41.884728 -87.619521 \n", "1 casual electric_bike 3.866667 41.785046 -87.601135 \n", "2 member electric_bike 8.866667 41.895618 -87.677103 \n", "3 member classic_bike 6.500000 41.935733 -87.663576 \n", "4 member classic_bike 19.616667 41.940232 -87.652944 \n", "\n", " end_lat end_lng geometry \n", "0 41.918306 -87.636282 POINT (-87.61952 41.88473) \n", "1 41.791478 -87.599861 POINT (-87.60113 41.78505) \n", "2 41.900363 -87.696704 POINT (-87.67710 41.89562) \n", "3 41.936497 -87.647539 POINT (-87.66358 41.93573) \n", "4 41.978353 -87.659753 POINT (-87.65294 41.94023) " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create point geometries\n", "geometry = geopandas.points_from_xy(geo_loc['start_lng'], geo_loc['start_lat'])\n", "geo_df = geopandas.GeoDataFrame(\n", " geo_loc[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n", " geometry=geometry\n", ")\n", "\n", "geo_df.head()" ] }, { "cell_type": "code", "execution_count": 13, "id": "37154e0c", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:03.833311Z", "start_time": "2023-11-30T19:20:03.728744Z" } }, "outputs": [ { "data": { "text/html": [ "
Make this Notebook Trusted to load map: File -> Trust Notebook
" ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from folium import plugins\n", "\n", "map = folium.Map(location=[geo_loc['start_lat'][1], geo_loc['start_lng'][1]])\n", "\n", "heat_data = [[point.xy[1][0], point.xy[0][0]] for point in geo_df.geometry]\n", "\n", "heat_data\n", "plugins.HeatMap(heat_data).add_to(map)\n", "\n", "map" ] }, { "cell_type": "markdown", "id": "55c9890d", "metadata": { "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ "#### Heatmap with 100,000 Samples" ] }, { "cell_type": "code", "execution_count": 14, "id": "775faca6", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:04.444714Z", "start_time": "2023-11-30T19:20:03.835311Z" } }, "outputs": [ { "data": { "text/plain": [ "(100000, 7)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geo_loc = data[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']].sample(n=100000)\n", "geo_loc.shape" ] }, { "cell_type": "code", "execution_count": 15, "id": "d50df5e1", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:05.176902Z", "start_time": "2023-11-30T19:20:04.447679Z" } }, "outputs": [], "source": [ "geo_loc.to_csv('100K_Samples.csv', index=False)" ] }, { "cell_type": "code", "execution_count": 16, "id": "a9a35619", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:05.348814Z", "start_time": "2023-11-30T19:20:05.179876Z" } }, "outputs": [ { "data": { "text/plain": [ "(100000, 7)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "medium = pd.read_csv('100K_Samples.csv')\n", "medium.shape" ] }, { "cell_type": "code", "execution_count": 17, "id": "bba85fca", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:05.504576Z", "start_time": "2023-11-30T19:20:05.351692Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
member_casualrideable_typeride_durationstart_latstart_lngend_latend_lnggeometry
0memberelectric_bike23.18333341.937623-87.64404141.883602-87.648627POINT (-87.64404 41.93762)
1casualelectric_bike12.20000041.885332-87.63579741.906724-87.634830POINT (-87.63580 41.88533)
2memberclassic_bike1.60000041.871262-87.67368841.871514-87.669886POINT (-87.67369 41.87126)
3memberelectric_bike6.90000041.890893-87.61800941.891023-87.635480POINT (-87.61801 41.89089)
4casualelectric_bike7.73333341.794329-87.60155841.799568-87.594747POINT (-87.60156 41.79433)
\n", "
" ], "text/plain": [ " member_casual rideable_type ride_duration start_lat start_lng \\\n", "0 member electric_bike 23.183333 41.937623 -87.644041 \n", "1 casual electric_bike 12.200000 41.885332 -87.635797 \n", "2 member classic_bike 1.600000 41.871262 -87.673688 \n", "3 member electric_bike 6.900000 41.890893 -87.618009 \n", "4 casual electric_bike 7.733333 41.794329 -87.601558 \n", "\n", " end_lat end_lng geometry \n", "0 41.883602 -87.648627 POINT (-87.64404 41.93762) \n", "1 41.906724 -87.634830 POINT (-87.63580 41.88533) \n", "2 41.871514 -87.669886 POINT (-87.67369 41.87126) \n", "3 41.891023 -87.635480 POINT (-87.61801 41.89089) \n", "4 41.799568 -87.594747 POINT (-87.60156 41.79433) " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create point geometries\n", "geometry = geopandas.points_from_xy(medium['start_lng'], medium['start_lat'])\n", "geo_df_med = geopandas.GeoDataFrame(\n", " medium[['member_casual', 'rideable_type', 'ride_duration', 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n", " geometry=geometry\n", ")\n", "\n", "geo_df_med.head()" ] }, { "cell_type": "code", "execution_count": 19, "id": "4cd67665", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:37.522788Z", "start_time": "2023-11-30T19:20:34.760165Z" } }, "outputs": [ { "data": { "text/html": [ "
Make this Notebook Trusted to load map: File -> Trust Notebook
" ], "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "map2 = folium.Map(location=[medium['start_lat'][1], medium['start_lng'][1]])\n", "\n", "heat_data = [[point.xy[1][0], point.xy[0][0]] for point in geo_df_med.geometry]\n", "\n", "heat_data\n", "#plugins.HeatMap(heat_data).add_to(map2)\n", "\n", "folium.Circle(\n", " radius=4500, # radius in meters\n", " location=[41.81, -87.70], # latitude and longitude of the center\n", " color='black', # color of the circle\n", " fill=False,\n", ").add_to(map2)\n", "\n", "folium.Circle(\n", " radius=4000, # radius in meters\n", " location=[41.93, -87.80], # latitude and longitude of the center\n", " color='black', # color of the circle\n", " fill=False,\n", ").add_to(map2)\n", "\n", "folium.Circle(\n", " radius=5000, # radius in meters\n", " location=[41.71, -87.61], # latitude and longitude of the center\n", " color='black', # color of the circle\n", " fill=False,\n", ").add_to(map2)\n", "\n", "map2" ] }, { "cell_type": "markdown", "id": "f4a78884", "metadata": {}, "source": [ "#### Heatmap with Station Markers" ] }, { "cell_type": "code", "execution_count": null, "id": "e3618133", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.553936Z", "start_time": "2023-11-30T19:20:09.553936Z" } }, "outputs": [], "source": [ "geo_loc = data[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', 'start_lat', 'start_lng', 'end_lat', 'end_lng']].sample(n=100000)\n", "geo_loc.shape\n", "\n", "geo_loc.to_csv('100K_Samples_2.csv', index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "cf698120", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.556937Z", "start_time": "2023-11-30T19:20:09.556937Z" }, "tags": [] }, "outputs": [], "source": [ "medium2 = pd.read_csv('100K_Samples_2.csv')\n", "medium2.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "22ebb974", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.559013Z", "start_time": "2023-11-30T19:20:09.559013Z" } }, "outputs": [], "source": [ "medium2 = medium2.sample(n=1000)" ] }, { "cell_type": "code", "execution_count": null, "id": "07f70c3b", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.560937Z", "start_time": "2023-11-30T19:20:09.560937Z" }, "tags": [] }, "outputs": [], "source": [ "# Create point geometries\n", "geometry = geopandas.points_from_xy(medium2['start_lng'], medium2['start_lat'])\n", "geo_df_med2 = geopandas.GeoDataFrame(\n", " medium2[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n", " geometry=geometry\n", ")\n", "\n", "geo_df_med2.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "50c83236", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.561958Z", "start_time": "2023-11-30T19:20:09.561958Z" } }, "outputs": [], "source": [ "pick_index = 6837\n", "\n", "medium2['start_lat'][pick_index]" ] }, { "cell_type": "code", "execution_count": null, "id": "f5c3cebe", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.563937Z", "start_time": "2023-11-30T19:20:09.563937Z" }, "tags": [] }, "outputs": [], "source": [ "map3 = folium.Map(location=[medium2['start_lat'][pick_index], medium2['start_lng'][pick_index]])\n", "\n", "# Add markers for each station\n", "for i, row in medium2.iterrows():\n", " folium.Marker(\n", " location=[row['start_lat'], row['start_lng']],\n", " tooltip = row['start_station_name']\n", " ).add_to(map3)\n", "\n", "heat_data = [[point.xy[1][0], point.xy[0][0]] for point in geo_df_med2.geometry]\n", "\n", "heat_data\n", "plugins.HeatMap(heat_data).add_to(map3)\n", "\n", "map3" ] }, { "cell_type": "markdown", "id": "e1de17a3", "metadata": {}, "source": [ "### Geographical Distribution: Member vs Casual" ] }, { "cell_type": "markdown", "id": "ea6ccca1", "metadata": {}, "source": [ "#### Sample Size 5,000" ] }, { "cell_type": "code", "execution_count": null, "id": "f26ee597", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.565941Z", "start_time": "2023-11-30T19:20:09.565941Z" } }, "outputs": [], "source": [ "hundred_K = pd.read_csv('100K_Samples_2.csv')\n", "hundred_K.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "bf469096", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.567938Z", "start_time": "2023-11-30T19:20:09.567938Z" } }, "outputs": [], "source": [ "five_K = hundred_K.sample(n=5000)\n", "five_K.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "5d435505", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.568937Z", "start_time": "2023-11-30T19:20:09.568937Z" } }, "outputs": [], "source": [ "five_K_groups = five_K.groupby('member_casual')" ] }, { "cell_type": "code", "execution_count": null, "id": "13274590", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.571937Z", "start_time": "2023-11-30T19:20:09.571937Z" }, "scrolled": true }, "outputs": [], "source": [ "casual = five_K_groups.get_group('casual')\n", "members = five_K_groups.get_group('member')\n", "print(casual.shape, members.shape)" ] }, { "cell_type": "code", "execution_count": null, "id": "33f72fc9", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.572938Z", "start_time": "2023-11-30T19:20:09.572938Z" } }, "outputs": [], "source": [ "start_loc = [41.88, -87.62]\n", "label_loc = [41.88, -87.62]\n", "\n", "combined_map = plugins.DualMap(location=start_loc, tiles='openstreetmap', zoom_start=14)\n", "\n", "# HeatMap for Casual\n", "casual.reset_index(drop=True)\n", "geometry_c = geopandas.points_from_xy(casual['start_lng'], casual['start_lat'])\n", "geo_group_c = geopandas.GeoDataFrame(\n", " casual[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', \n", " 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n", " geometry=geometry_c\n", ")\n", "\n", "heat_data_c = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_c.geometry]\n", "plugins.HeatMap(heat_data_c).add_to(combined_map.m2)\n", "\n", "\n", "# HeatMap for Members\n", "members.reset_index(drop=True)\n", "geometry_m = geopandas.points_from_xy(members['start_lng'], members['start_lat'])\n", "geo_group_m = geopandas.GeoDataFrame(\n", " members[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', \n", " 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n", " geometry=geometry_m\n", ")\n", "\n", "heat_data_m = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_m.geometry]\n", "plugins.HeatMap(heat_data_m).add_to(combined_map.m1)\n", "\n", "\n", "combined_map" ] }, { "cell_type": "code", "execution_count": null, "id": "f4dd3e07", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.574971Z", "start_time": "2023-11-30T19:20:09.574971Z" } }, "outputs": [], "source": [ "one_K_casual = casual.sample(n=1000)\n", "one_K_members = members.sample(n=1000)" ] }, { "cell_type": "code", "execution_count": null, "id": "be231133", "metadata": { "ExecuteTime": { "end_time": "2023-11-30T19:20:09.577958Z", "start_time": "2023-11-30T19:20:09.577958Z" } }, "outputs": [], "source": [ "start_loc = [41.88, -87.62]\n", "label_loc = [41.88, -87.62]\n", "\n", "combined_map_even = plugins.DualMap(location=start_loc, tiles='openstreetmap', zoom_start=10)\n", "\n", "# HeatMap for Casual\n", "one_K_casual.reset_index(drop=True)\n", "geometry_c = geopandas.points_from_xy(one_K_casual['start_lng'], one_K_casual['start_lat'])\n", "geo_group_c = geopandas.GeoDataFrame(\n", " one_K_casual[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', \n", " 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n", " geometry=geometry_c\n", ")\n", "\n", "heat_data_c = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_c.geometry]\n", "\n", "plugins.HeatMap(heat_data_c).add_to(combined_map_even.m2)\n", "\n", "\n", "\n", "# HeatMap for Members\n", "one_K_members.reset_index(drop=True)\n", "geometry_m = geopandas.points_from_xy(one_K_members['start_lng'], one_K_members['start_lat'])\n", "geo_group_m = geopandas.GeoDataFrame(\n", " one_K_members[['member_casual', 'rideable_type', 'ride_duration', 'start_station_name', \n", " 'start_lat', 'start_lng', 'end_lat', 'end_lng']], \n", " geometry=geometry_m\n", ")\n", "\n", "heat_data_m = [[point.xy[1][0], point.xy[0][0]] for point in geo_group_m.geometry]\n", "plugins.HeatMap(heat_data_m).add_to(combined_map_even.m1)\n", "\n", "folium.Circle(\n", " radius=4500, # radius in meters\n", " location=[41.81, -87.70], # latitude and longitude of the center\n", " color='black', # color of the circle\n", " fill=False,\n", ").add_to(combined_map_even)\n", "\n", "folium.Circle(\n", " radius=4000, # radius in meters\n", " location=[41.93, -87.80], # latitude and longitude of the center\n", " color='black', # color of the circle\n", " fill=False,\n", ").add_to(combined_map_even)\n", "\n", "folium.Circle(\n", " radius=5000, # radius in meters\n", " location=[41.71, -87.61], # latitude and longitude of the center\n", " color='black', # color of the circle\n", " fill=False,\n", ").add_to(combined_map_even)\n", "\n", "combined_map_even" ] }, { "cell_type": "code", "execution_count": null, "id": "337e4769", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "aab065a9", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "298.594px" }, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 5 }