In [1]:
import os
import sys
sys.path.append(os.path.abspath("../geoparse/"))
In [2]:
import warnings
import pandas as pd
from geoparse import SpatialIndex, plp
warnings.filterwarnings("ignore")
Read from CSV file¶
In [ ]:
In [5]:
df = pd.read_csv("https://geoparse.io/tutorials/data/fatal_crash_great_britain_2023.csv")
df.head()
Out[5]:
date | time | latitude | longitude | number_of_vehicles | number_of_casualties | speed_limit | |
---|---|---|---|---|---|---|---|
0 | 03/01/2023 | 19:12 | 51.356551 | -0.097759 | 1 | 1 | 30 |
1 | 07/01/2023 | 10:05 | 51.593701 | 0.022379 | 1 | 1 | 30 |
2 | 14/01/2023 | 16:15 | 51.466689 | -0.011289 | 1 | 1 | 20 |
3 | 15/01/2023 | 19:51 | 51.671577 | -0.037543 | 1 | 1 | 30 |
4 | 16/01/2023 | 19:22 | 51.447944 | 0.117279 | 2 | 1 | 30 |
In [6]:
len(df)
Out[6]:
1522
In [7]:
plp(df)
Out[7]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [8]:
df = df.head()
df
Out[8]:
date | time | latitude | longitude | number_of_vehicles | number_of_casualties | speed_limit | |
---|---|---|---|---|---|---|---|
0 | 03/01/2023 | 19:12 | 51.356551 | -0.097759 | 1 | 1 | 30 |
1 | 07/01/2023 | 10:05 | 51.593701 | 0.022379 | 1 | 1 | 30 |
2 | 14/01/2023 | 16:15 | 51.466689 | -0.011289 | 1 | 1 | 20 |
3 | 15/01/2023 | 19:51 | 51.671577 | -0.037543 | 1 | 1 | 30 |
4 | 16/01/2023 | 19:22 | 51.447944 | 0.117279 | 2 | 1 | 30 |
In [10]:
%%time
df["h3"] = SpatialIndex.ppoint_cell(df.latitude, df.longitude, cell_type="h3", res=15)
df.head()
CPU times: user 68.5 ms, sys: 136 ms, total: 204 ms Wall time: 1.93 s
Out[10]:
date | time | latitude | longitude | number_of_vehicles | number_of_casualties | speed_limit | h3 | |
---|---|---|---|---|---|---|---|---|
0 | 03/01/2023 | 19:12 | 51.356551 | -0.097759 | 1 | 1 | 30 | 8f194ac22370811 |
1 | 07/01/2023 | 10:05 | 51.593701 | 0.022379 | 1 | 1 | 30 | 8f194e68042a851 |
2 | 14/01/2023 | 16:15 | 51.466689 | -0.011289 | 1 | 1 | 20 | 8f194ad058a4c04 |
3 | 15/01/2023 | 19:51 | 51.671577 | -0.037543 | 1 | 1 | 30 | 8f195db6b2d6cca |
4 | 16/01/2023 | 19:22 | 51.447944 | 0.117279 | 2 | 1 | 30 | 8f194e6deb25ac9 |
In [11]:
%%time
df[["lat", "lon"]] = SpatialIndex.pcell_point(df.h3, cell_type="h3")
df.head()
CPU times: user 44 ms, sys: 121 ms, total: 165 ms Wall time: 1.93 s
Out[11]:
date | time | latitude | longitude | number_of_vehicles | number_of_casualties | speed_limit | h3 | lat | lon | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 03/01/2023 | 19:12 | 51.356551 | -0.097759 | 1 | 1 | 30 | 8f194ac22370811 | 51.356552 | -0.097765 |
1 | 07/01/2023 | 10:05 | 51.593701 | 0.022379 | 1 | 1 | 30 | 8f194e68042a851 | 51.593698 | 0.022381 |
2 | 14/01/2023 | 16:15 | 51.466689 | -0.011289 | 1 | 1 | 20 | 8f194ad058a4c04 | 51.466687 | -0.011286 |
3 | 15/01/2023 | 19:51 | 51.671577 | -0.037543 | 1 | 1 | 30 | 8f195db6b2d6cca | 51.671579 | -0.037540 |
4 | 16/01/2023 | 19:22 | 51.447944 | 0.117279 | 2 | 1 | 30 | 8f194e6deb25ac9 | 51.447941 | 0.117280 |
In [12]:
max(abs(df.latitude - df.lat)), max(abs(df.longitude - df.lon))
Out[12]:
(3.387398464838043e-06, 6.242061589473158e-06)
In [ ]:
In [ ]:
In [13]:
%%time
df["s2"] = SpatialIndex.ppoint_cell(df.latitude, df.longitude, cell_type="s2", res=30)
df.head()
CPU times: user 49.1 ms, sys: 122 ms, total: 171 ms Wall time: 1.88 s
Out[13]:
date | time | latitude | longitude | number_of_vehicles | number_of_casualties | speed_limit | h3 | lat | lon | s2 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 03/01/2023 | 19:12 | 51.356551 | -0.097759 | 1 | 1 | 30 | 8f194ac22370811 | 51.356552 | -0.097765 | 487607526cb67e75 |
1 | 07/01/2023 | 10:05 | 51.593701 | 0.022379 | 1 | 1 | 30 | 8f194e68042a851 | 51.593698 | 0.022381 | 47d8a749f4303a43 |
2 | 14/01/2023 | 16:15 | 51.466689 | -0.011289 | 1 | 1 | 20 | 8f194ad058a4c04 | 51.466687 | -0.011286 | 4876027b49457ecb |
3 | 15/01/2023 | 19:51 | 51.671577 | -0.037543 | 1 | 1 | 30 | 8f195db6b2d6cca | 51.671579 | -0.037540 | 48761f9ac2a128c7 |
4 | 16/01/2023 | 19:22 | 51.447944 | 0.117279 | 2 | 1 | 30 | 8f194e6deb25ac9 | 51.447941 | 0.117280 | 47d8ae95f533fc95 |
In [14]:
%%time
df[["lat", "lon"]] = SpatialIndex.pcell_point(df.s2, cell_type="s2")
df.head()
CPU times: user 43.5 ms, sys: 122 ms, total: 165 ms Wall time: 1.95 s
Out[14]:
date | time | latitude | longitude | number_of_vehicles | number_of_casualties | speed_limit | h3 | lat | lon | s2 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 03/01/2023 | 19:12 | 51.356551 | -0.097759 | 1 | 1 | 30 | 8f194ac22370811 | 51.356551 | -0.097759 | 487607526cb67e75 |
1 | 07/01/2023 | 10:05 | 51.593701 | 0.022379 | 1 | 1 | 30 | 8f194e68042a851 | 51.593701 | 0.022379 | 47d8a749f4303a43 |
2 | 14/01/2023 | 16:15 | 51.466689 | -0.011289 | 1 | 1 | 20 | 8f194ad058a4c04 | 51.466689 | -0.011289 | 4876027b49457ecb |
3 | 15/01/2023 | 19:51 | 51.671577 | -0.037543 | 1 | 1 | 30 | 8f195db6b2d6cca | 51.671577 | -0.037543 | 48761f9ac2a128c7 |
4 | 16/01/2023 | 19:22 | 51.447944 | 0.117279 | 2 | 1 | 30 | 8f194e6deb25ac9 | 51.447944 | 0.117279 | 47d8ae95f533fc95 |
In [15]:
max(abs(df.latitude - df.lat)), max(abs(df.longitude - df.lon))
Out[15]:
(3.998487585477051e-08, 3.7182211115593944e-08)
In [ ]:
In [ ]:
In [23]:
%%time
df["geohash"] = SpatialIndex.ppoint_cell(df.latitude, df.longitude, cell_type="geohash", res=10)
df.head()
CPU times: user 51.5 ms, sys: 124 ms, total: 175 ms Wall time: 2.16 s
Out[23]:
date | time | latitude | longitude | number_of_vehicles | number_of_casualties | speed_limit | h3 | lat | lon | s2 | geohash | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 03/01/2023 | 19:12 | 51.356551 | -0.097759 | 1 | 1 | 30 | 8f194ac22370811 | 51.356551 | -0.097759 | 487607526cb67e75 | gcpujv0vd4 |
1 | 07/01/2023 | 10:05 | 51.593701 | 0.022379 | 1 | 1 | 30 | 8f194e68042a851 | 51.593701 | 0.022379 | 47d8a749f4303a43 | u10j882716 |
2 | 14/01/2023 | 16:15 | 51.466689 | -0.011289 | 1 | 1 | 20 | 8f194ad058a4c04 | 51.466689 | -0.011289 | 4876027b49457ecb | gcpuz9pz0y |
3 | 15/01/2023 | 19:51 | 51.671577 | -0.037543 | 1 | 1 | 30 | 8f195db6b2d6cca | 51.671577 | -0.037543 | 48761f9ac2a128c7 | gcpvzns8v0 |
4 | 16/01/2023 | 19:22 | 51.447944 | 0.117279 | 2 | 1 | 30 | 8f194e6deb25ac9 | 51.447944 | 0.117279 | 47d8ae95f533fc95 | u10hdtv3uz |
In [24]:
%%time
df[["lat", "lon"]] = SpatialIndex.pcell_point(df.geohash, cell_type="geohash")
df.head()
CPU times: user 40.1 ms, sys: 116 ms, total: 156 ms Wall time: 1.93 s
Out[24]:
date | time | latitude | longitude | number_of_vehicles | number_of_casualties | speed_limit | h3 | lat | lon | s2 | geohash | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 03/01/2023 | 19:12 | 51.356551 | -0.097759 | 1 | 1 | 30 | 8f194ac22370811 | 51.35655 | -0.0978 | 487607526cb67e75 | gcpujv0vd4 |
1 | 07/01/2023 | 10:05 | 51.593701 | 0.022379 | 1 | 1 | 30 | 8f194e68042a851 | 51.59370 | 0.0224 | 47d8a749f4303a43 | u10j882716 |
2 | 14/01/2023 | 16:15 | 51.466689 | -0.011289 | 1 | 1 | 20 | 8f194ad058a4c04 | 51.46669 | -0.0113 | 4876027b49457ecb | gcpuz9pz0y |
3 | 15/01/2023 | 19:51 | 51.671577 | -0.037543 | 1 | 1 | 30 | 8f195db6b2d6cca | 51.67158 | -0.0375 | 48761f9ac2a128c7 | gcpvzns8v0 |
4 | 16/01/2023 | 19:22 | 51.447944 | 0.117279 | 2 | 1 | 30 | 8f194e6deb25ac9 | 51.44794 | 0.1173 | 47d8ae95f533fc95 | u10hdtv3uz |
In [25]:
max(abs(df.latitude - df.lat)), max(abs(df.longitude - df.lon))
Out[25]:
(3.999999997006398e-06, 4.300000000000137e-05)
In [ ]:
In [ ]: