{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "b16c5a59", "metadata": { "ExecuteTime": { "end_time": "2023-11-10T15:16:24.872611Z", "start_time": "2023-11-10T15:16:17.544623Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from scipy import stats\n", "import sweetviz\n", "import folium\n", "import geopandas\n", "from geopy.distance import geodesic\n", "from datetime import datetime" ] }, { "cell_type": "code", "execution_count": 2, "id": "8f50b52d", "metadata": { "ExecuteTime": { "end_time": "2023-11-10T15:21:51.826550Z", "start_time": "2023-11-10T15:21:10.232818Z" } }, "outputs": [ { "data": { "text/plain": [ "(5733451, 13)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.read_csv('../../../Desktop/SamX/Bike_Study_Files/big_raw.csv')\n", "data.shape" ] }, { "cell_type": "code", "execution_count": 10, "id": "192bbebe", "metadata": { "ExecuteTime": { "end_time": "2023-11-10T15:53:09.128514Z", "start_time": "2023-11-10T15:53:09.000650Z" } }, "outputs": [ { "data": { "text/plain": [ "(894, 902, 1001)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Counting unique stations for both start and end stations\n", "unique_start_stations = data['start_station_name'].nunique()\n", "unique_end_stations = data['end_station_name'].nunique()\n", "\n", "# Calculating the total number of unique stations\n", "# Assuming a station can be either a start or an end station\n", "unique_stations = data[['start_station_name', 'end_station_name']].stack().nunique()\n", "\n", "unique_start_stations, unique_end_stations, unique_stations" ] }, { "cell_type": "markdown", "id": "2eecc8dd", "metadata": {}, "source": [ "#### Note:\n", "For the purposes of this analysis, I do not need to be crunching through all 5 million rows of data. A sample of 50,000 is more than enough, and possibly too much for the heatmapping functions. I may need to tweak the parameters of the heatmap, or further restrict the sample size later down the road" ] }, { "cell_type": "code", "execution_count": 3, "id": "75b6b8f7", "metadata": { "ExecuteTime": { "end_time": "2023-11-10T15:25:07.162102Z", "start_time": "2023-11-10T15:25:05.757351Z" } }, "outputs": [ { "data": { "text/plain": [ "(50000, 13)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = data.sample(n=50000, replace=False, random_state=42)\n", "data.shape" ] }, { "cell_type": "code", "execution_count": 4, "id": "25717c01", "metadata": { "ExecuteTime": { "end_time": "2023-11-10T15:25:29.522553Z", "start_time": "2023-11-10T15:25:28.868730Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | ride_id | \n", "rideable_type | \n", "member_casual | \n", "started_at | \n", "ended_at | \n", "ride_duration | \n", "start_date | \n", "start_hour | \n", "day_of_week | \n", "week_of_year | \n", "
---|---|---|---|---|---|---|---|---|---|---|
3479634 | \n", "DF3F95B75B6DFBBC | \n", "electric_bike | \n", "member | \n", "2022-08-09 09:54:27 | \n", "2022-08-09 09:57:36 | \n", "3.150000 | \n", "2022-08-09 | \n", "9 | \n", "Tuesday | \n", "32 | \n", "
2954009 | \n", "E62A501C91C9283A | \n", "electric_bike | \n", "casual | \n", "2022-07-05 17:32:36 | \n", "2022-07-05 17:39:24 | \n", "6.800000 | \n", "2022-07-05 | \n", "17 | \n", "Tuesday | \n", "27 | \n", "
4233289 | \n", "2332C4F5E2EE3457 | \n", "classic_bike | \n", "member | \n", "2022-09-15 17:05:35 | \n", "2022-09-15 17:20:22 | \n", "14.783333 | \n", "2022-09-15 | \n", "17 | \n", "Thursday | \n", "37 | \n", "
4282455 | \n", "063065272EF72853 | \n", "classic_bike | \n", "member | \n", "2022-09-08 13:51:19 | \n", "2022-09-08 13:55:26 | \n", "4.116667 | \n", "2022-09-08 | \n", "13 | \n", "Thursday | \n", "36 | \n", "
2265302 | \n", "AFC7A810123AB8EB | \n", "classic_bike | \n", "casual | \n", "2022-06-19 11:36:21 | \n", "2022-06-19 12:39:10 | \n", "62.816667 | \n", "2022-06-19 | \n", "11 | \n", "Sunday | \n", "24 | \n", "