Skip to content
Snippets Groups Projects
Commit 229c3b14 authored by Timon Erhart's avatar Timon Erhart
Browse files

rename functions

parent aed83ce3
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# Workflow Example
> Shows how to execute the libraries modules to fulfill a complete workflow. At the same time, it serves as a integration test
%% Cell type:code id: tags:
``` python
#| hide
# Jupyter Magic: Reload all modules every time before executing the Python code typed
%load_ext autoreload
%autoreload 2
```
%% Output
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
%% Cell type:markdown id: tags:
## Imports
%% Cell type:code id: tags:
``` python
from pathlib import Path
import geopandas as gpd
from datetime import datetime, timezone
# from osm_completeness.logger import logger, dummy_logger
# from osm_completeness.constants import (
# RAW_DATA_PBF_PATH_PREFIX,
# RAW_DATA_PATH_PREFIX,
# FEATURE_PATH_PREFIX,
# FILTER_DATA_GROUP_FILTER_PATH_PREFIX,
# RAW_DATA_GRID_PATH_PREFIX,
# PROCESSED_DATA_BALLTREE_PATH_PREFIX,
# DATETIME_FORMAT,
# )
from osm_completeness.persistence import (
save_gpd_parquet,
save_pd_parquet,
load_gpd_parquet,
load_pd_parquet,
create_timestamp,
)
from osm_completeness.osmdb import OsmDB, test_db
from osm_completeness.data_gathering import query_osmdb_by_filter
from osm_completeness.data_gathering import filter_pois_by_tag, load_tag_filter
from osm_completeness.data_preparation import cleaning_poi_count
from osm_completeness.feature_creation_osmdb import query_osmdb_count_by_grid
# from osm_completeness.feature_creation_balltree import create_ball_tree
from osm_completeness.feature_creation_count import poi_count_by_grid
from osm_completeness.feature_creation_distance import (
distance_feature, # TODO rename to create_distance_feature BUT be aware of the name conflict with the knn module
distance_feature_from_path,
save_distance_feature,
)
from osm_completeness.feature_creation.knn import create_distance_feature, create_and_train_knn, _get_centroid_coords_from_gdf
from osm_completeness.feature_transformation import (
power_transform_min_max_and_merge,
load_distance_feature_and_poi_count,
save_merged,
)
```
%% Output
2024-02-14T15:04:52 | ℹ️ INFO  | osm_completeness.logger:initialize_system:160 |
Logger system initialization complete. Starting the application...
2024-02-14T15:04:52 | ℹ️ INFO  | osm_completeness.logger:<module>:172 |
Logger has been initialized and ready to use.
%% Cell type:markdown id: tags:
## Workflow
![worfklow](../doc/workflow_dataflow.svg)
%% Cell type:markdown id: tags:
### General Config
This should be the only inputs
%% Cell type:code id: tags:
``` python
# TODO my be a data structure (e.g. named tuple, dataclass, dict)?
BBOX_RAPPERSWIL = [8.790092,47.206508,8.920555,47.255699] # Rapperswil including Relation Rapperswil-Jona
GROUP_FILTER_NAME = "shops"
GROUP_FILTER_PATH = Path("../data/test/rapperswil_full/9-filter/group-filters/shops/shops_2023-03-10T095340.json")
POI_FILTER_NAME = "shops"
POI_FILTER_PATH = Path("../data/test/rapperswil_full/9-filter/group-filters/shops/shops_2023-03-10T095340.json")
AREA_FILTER_PATH = Path("../data/test/rapperswil_full/9-filter/area-filters/Rapperswil_Urban_Area/Rapperswil_Urban_Area_2023-05-03T092040.json")
RUN_TIMESTAMP = create_timestamp()
print(RUN_TIMESTAMP) # Timestamp for this run (used for saving files)
assert GROUP_FILTER_PATH.exists(), f"Filter path {GROUP_FILTER_PATH} does not exist"
assert POI_FILTER_PATH.exists(), f"Filter path {POI_FILTER_PATH} does not exist"
assert AREA_FILTER_PATH.exists(), f"Filter path {AREA_FILTER_PATH} does not exist"
```
%% Output
2024-02-14T150455
2024-02-14T203101
%% Cell type:code id: tags:
``` python
# Assure OsmDB is available
assert test_db.test_connection()
```
%% Cell type:markdown id: tags:
### 101 Filter by Tags (Osm-DB, new)
%% Cell type:code id: tags:
``` python
pois = query_osmdb_by_filter(
tag_filter = load_tag_filter(POI_FILTER_PATH)
pois = filter_pois_by_tag(
test_db,
GROUP_FILTER_PATH,
tag_filter,
BBOX_RAPPERSWIL,
)
print(pois.shape)
pois.head(2)
```
%% Output
(216, 5)
/home/erti/PROJECT_repos/OsmCompleteness/osm-completeness/osm_completeness/data_gathering.py:75: FutureWarning: You are adding a column named 'geometry' to a GeoDataFrame constructed without an active geometry column. Currently, this automatically sets the active geometry column to 'geometry' but in the future that will no longer happen. Instead, either provide geometry to the GeoDataFrame constructor (GeoDataFrame(... geometry=GeoSeries()) or use `set_geometry('geometry')` to explicitly set the active geometry column.
gdf_oldstyle["geometry"] = gdf["geom"]
(216, 4)
id type filter name filter match \
0 1464394035 node shops_2023-03-10T095340
1 4828570638 node shops_2023-03-10T095340
geometry
0 POINT (8.85315 47.25407)
1 POINT (8.85357 47.25213)
osm_id name \
0 1464394035 Do it + Garden Migros
1 4828570638 Handbuchbinderei & Einrahmungen
tags geom
0 {'name': 'Do it + Garden Migros', 'shop': 'doi... POINT (8.85315 47.25407)
1 {'name': 'Handbuchbinderei & Einrahmungen', 's... POINT (8.85357 47.25213)
%% Cell type:markdown id: tags:
### 3005 Build Geofences
%% Cell type:code id: tags:
``` python
# TODO workaround until done: load from file
merged_geofence = gpd.read_parquet("../data/test/rapperswil_full/0-raw/grid/rapperswil_geofence_merged.parquet")
```
%% Cell type:code id: tags:
``` python
# Inspect
display(merged_geofence.head(2))
print("Shape: ", merged_geofence.shape)
merged_geofence['urban area'].value_counts()
```
%% Output
Shape: (1253, 3)
urban area
rapperswil 1253
Name: count, dtype: int64
%% Cell type:code id: tags:
``` python
# merged_geofence.explore()
```
%% Cell type:markdown id: tags:
### 804 Count by grid (OsmDB)
%% Cell type:code id: tags:
``` python
poi_count = query_osmdb_count_by_grid(
test_db,
GROUP_FILTER_PATH,
POI_FILTER_PATH,
BBOX_RAPPERSWIL,
)
print(poi_count.shape)
poi_count.head(2)
```
%% Output
(15825, 3)
grid_id geom count
0 2486800_1112000 POINT (5.97341 46.14995) 1
1 2488000_1118100 POINT (5.98748 46.20501) 1
%% Cell type:markdown id: tags:
### 200 Cleaning Count
%% Cell type:code id: tags:
``` python
# TODO really needed? implement into count_by_grid as e.g. "check_clean"
# cleaning_count_input = "osm_data_poi_count_filter_shops_example100.parquet"
# poi_count_cleaned = cleaning_poi_count(poi_count, dummy_logger)
poi_count_cleaned = poi_count
```
%% Cell type:markdown id: tags:
### 806 Distance Features
%% Cell type:code id: tags:
``` python
# raw_pois = load_gpd_parquet("../data/test/rapperswil_full/0-raw/raw_osm_data_filter_shops.parquet")
# poi_count = load_gpd_parquet("../data/test/rapperswil_full/0-raw/osm_data_poi_count_filter_shops_clean.parquet")
knn = create_and_train_knn(pois)
distance_feature = create_distance_feature(knn, poi_count_cleaned)
print(distance_feature.shape)
distance_feature.head(2)
```
%% Output
(15825, 24)
/home/erti/PROJECT_repos/OsmCompleteness/osm-completeness/osm_completeness/feature_creation/knn.py:21: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
cords = gdf.geometry.centroid.get_coordinates().to_numpy()
/home/erti/PROJECT_repos/OsmCompleteness/osm-completeness/osm_completeness/feature_creation/knn.py:21: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
cords = gdf.geometry.centroid.get_coordinates().to_numpy()
grid_id geom count distance_0 distance_1 \
0 2486800_1112000 POINT (5.97341 46.14995) 1 3.029405 3.031890
1 2488000_1118100 POINT (5.98748 46.20501) 1 2.996696 2.999077
distance_2 distance_3 distance_4 distance_5 distance_6 ... \
0 3.033666 3.036659 3.038469 3.038568 3.038722 ...
1 3.000996 3.004007 3.006174 3.006267 3.006421 ...
distance_11 distance_12 distance_13 distance_14 distance_15 \
0 3.039024 3.039038 3.039069 3.039159 3.039171
1 3.006735 3.006741 3.006770 3.006860 3.006867
distance_16 distance_17 distance_18 distance_19 \
0 3.039227 3.039239 3.039525 3.039545
1 3.006928 3.006934 3.007231 3.007242
geometry
0 POINT (5.97341 46.14995)
1 POINT (5.98748 46.20501)
[2 rows x 24 columns]
%% Cell type:markdown id: tags:
### 901 Feature Transformation
TODO distance feature seems to make model worse, not better
%% Cell type:code id: tags:
``` python
# Load
poi_count_filepath = "../data/test/rapperswil_full/0-raw/osm_data_poi_count_filter_shops_clean.parquet"
distance_feature, raw_poi_count = load_distance_feature_and_poi_count(
distancefeature_filepath,
poi_count_filepath,
)
```
%% Output
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[18], line 4
1 # Load
2 poi_count_filepath = "../data/test/rapperswil_full/0-raw/osm_data_poi_count_filter_shops_clean.parquet"
3 distance_feature, raw_poi_count = load_distance_feature_and_poi_count(
----> 4 distancefeature_filepath,
5 poi_count_filepath,
6 )
NameError: name 'distancefeature_filepath' is not defined
%% Cell type:code id: tags:
``` python
# Transfrom&Merge
features_transformed_merged = power_transform_min_max_and_merge(
distance_feature,
raw_poi_count,
)
```
%% Cell type:code id: tags:
``` python
features_transformed_merged.head(2)
```
%% Output
geometry \
uuid
84a51433-51bc-5b29-8eba-c90f453a9a64 POLYGON ((8.79499 47.24211, 8.79631 47.24211, ...
0a891b56-93c6-5932-b106-0d5ad483084e POLYGON ((8.79499 47.24120, 8.79631 47.24120, ...
urban area shops shops distance_0 \
uuid
84a51433-51bc-5b29-8eba-c90f453a9a64 basel 0.0 0.636566
0a891b56-93c6-5932-b106-0d5ad483084e basel 0.0 0.672586
shops distance_1 shops distance_2 \
uuid
84a51433-51bc-5b29-8eba-c90f453a9a64 0.640241 0.616391
0a891b56-93c6-5932-b106-0d5ad483084e 0.663355 0.633558
shops distance_3 shops distance_4 \
uuid
84a51433-51bc-5b29-8eba-c90f453a9a64 0.589464 0.730488
0a891b56-93c6-5932-b106-0d5ad483084e 0.607533 0.724630
shops distance_5 shops distance_6 ... \
uuid ...
84a51433-51bc-5b29-8eba-c90f453a9a64 0.741115 0.744706 ...
0a891b56-93c6-5932-b106-0d5ad483084e 0.748109 0.738116 ...
shops distance_10 shops distance_11 \
uuid
84a51433-51bc-5b29-8eba-c90f453a9a64 0.748299 0.742585
0a891b56-93c6-5932-b106-0d5ad483084e 0.741430 0.735754
shops distance_12 shops distance_13 \
uuid
84a51433-51bc-5b29-8eba-c90f453a9a64 0.739127 0.73633
0a891b56-93c6-5932-b106-0d5ad483084e 0.732179 0.72941
shops distance_14 shops distance_15 \
uuid
84a51433-51bc-5b29-8eba-c90f453a9a64 0.726931 0.720853
0a891b56-93c6-5932-b106-0d5ad483084e 0.719802 0.713723
shops distance_16 shops distance_17 \
uuid
84a51433-51bc-5b29-8eba-c90f453a9a64 0.713804 0.710364
0a891b56-93c6-5932-b106-0d5ad483084e 0.706541 0.703190
shops distance_18 shops distance_19
uuid
84a51433-51bc-5b29-8eba-c90f453a9a64 0.707789 0.708378
0a891b56-93c6-5932-b106-0d5ad483084e 0.700606 0.701206
[2 rows x 23 columns]
%% Cell type:code id: tags:
``` python
# Save
timestamp: str = datetime.now(tz=timezone.utc).strftime(DATETIME_FORMAT)
filepath_features_transformed_merged = FEATURE_PATH_PREFIX / (distancefeature_filepath.stem + "_power_transformed_min_max.parquet")
save_merged(features_transformed_merged, filepath_features_transformed_merged)
```
%% Cell type:markdown id: tags:
### 1200 Model Development
%% Cell type:markdown id: tags:
### 1600 Predictions & Validation
......
%% Cell type:markdown id: tags:
 
# Filtering POIs from OsmDb
 
> Counts by grid from OsmDb
 
%% Cell type:code id: tags:
 
``` python
#| hide
# Jupyter Magic: Reload all modules every time before executing the Python code typed
%load_ext autoreload
%autoreload 2
```
 
%% Cell type:code id: tags:
 
``` python
#| default_exp data_gathering
```
 
%% Cell type:code id: tags:
 
``` python
#| export
 
import json
from pathlib import Path
from typing import Sequence, Union, Dict
 
import geopandas as gpd
from sqlalchemy import create_engine
from sqlalchemy.exc import OperationalError, ResourceClosedError
 
from osm_completeness.osmdb import DEFAULT_SRID, OsmDB, create_sql_bbox, test_db
```
 
%% Cell type:markdown id: tags:
 
## Private & Helpers
 
%% Cell type:code id: tags:
 
``` python
# | export
 
def load_poi_filter(path: Union[str, Path]) -> dict:
def load_tag_filter(path: Union[str, Path]) -> dict:
"""Load the POI filter from a JSON file."""
path = Path(path)
if not path.exists():
raise FileNotFoundError(
f"POI filter file {path} does not exist"
)
with open(path, "r") as f:
return json.load(f)
```
 
%% Cell type:code id: tags:
 
``` python
# | export
 
# TODO use create sql function from osmdb
 
def _create_sql_tag_filter(
tag_filter_dict: Dict, # path to json file containing the group filters
) -> str:
sql_filter_strings = []
for key, values in tag_filter_dict.items():
single_quote = "'"
sql_values = (
f"({', '.join((f'{single_quote}{v}{single_quote}' for v in values))})"
)
sql_filter_strings.append(f"tags->'{key}' in {sql_values}")
 
return " OR ".join(sql_filter_strings)
```
 
%% Cell type:code id: tags:
 
``` python
# | export
 
# TODO use create sql function from osmdb
 
def _create_sql_filter_query(
tag_filter_dict: Dict, # dict containing the tag filters
bbox: Sequence[float], # bbox lat/lon: [xmin, ymin, xmax, ymax]
table_name: str = "osm_point",
geom_col_name: str = "geom", # column that contains the postgis geometry
srid: int = DEFAULT_SRID,
) -> str:
sql_bbox = create_sql_bbox(bbox)
sql_filter = _create_sql_tag_filter(tag_filter_dict)
sql_query = f"SELECT * FROM {table_name} WHERE {geom_col_name} @ {sql_bbox} AND ({sql_filter});"
 
return sql_query
```
 
%% Cell type:code id: tags:
 
``` python
# | export
 
# TODO remove if no longer needed
 
def _convert_to_legacy_gdf(gdf: gpd.GeoDataFrame, group_filter_name) -> gpd.GeoDataFrame:
"""
Converts the result of the OsmDb groub filter query into the same format as the pbf file parser
"""
 
gdf_oldstyle = gpd.GeoDataFrame()
gdf_oldstyle['id'] = gdf['osm_id']
gdf_oldstyle['type'] = 'node'
gdf_oldstyle['filter name'] = group_filter_name # TODO if really required ? In future from config
gdf_oldstyle['filter match'] = '' # TODO if really required ?
# gdf_oldstyle['geometry'] = gdf['geom'].apply(lambda x: shapely.wkb.loads(x))
gdf_oldstyle['geometry'] = gdf['geom']
 
return gdf_oldstyle
```
 
%% Cell type:markdown id: tags:
 
## Public Interface
 
%% Cell type:code id: tags:
 
``` python
# | export
 
def poi_tag_filter(
def filter_pois_by_tag(
osmdb: OsmDB,
tag_filter_dict: Dict, # dict containing the tag filters
bbox: Sequence[float], # lon/lat [xmin, ymin, xmax, ymax]
) -> gpd.GeoDataFrame:
"""
Query the OSM database in a certain area (bbox) for POIs using a tag filter.
"""
sql_query = _create_sql_filter_query(tag_filter_dict, bbox)
gdf = osmdb.execute_query_gdf(sql_query)
return gdf
```
 
%% Cell type:code id: tags:
 
``` python
# | export
 
# TODO remove if no longer needed
 
def query_osmdb_by_filter(
osmdb: OsmDB,
group_filter_file: Union[Path, str], # full path to json file containing the group filters
bbox: Sequence[float], # lon/lat [xmin, ymin, xmax, ymax]
) -> gpd.GeoDataFrame:
"""
complete workflow to query the OSM database by a group filter
"""
group_filter_file = Path(group_filter_file) # Make sure its a path object
sql_query = _create_sql_filter_query(group_filter_file, bbox)
gdf = osmdb.execute_query_gdf(sql_query)
gdf = _convert_to_legacy_gdf(gdf, group_filter_file.stem)
return gdf
```
 
%% Cell type:markdown id: tags:
 
## Test & Usage examples
 
%% Cell type:code id: tags:
 
``` python
# | hide
import folium
```
 
%% Cell type:code id: tags:
 
``` python
# Test specification
path = Path('../data/9-filter/group-filters/food_and_beverages/food_and_beverages_2023-03-10T095340.json')
group_filter_name = 'food_and_beverages'
bbox_zuerich = [8.43, 47.32, 8.65, 47.46]
```
 
%% Cell type:code id: tags:
 
``` python
# Load filter
 
tag_filter_dict = load_poi_filter(path)
tag_filter_dict = load_tag_filter(path)
```
 
%% Cell type:code id: tags:
 
``` python
# Test create full query
 
query = _create_sql_filter_query(
tag_filter_dict=tag_filter_dict,
bbox=bbox_zuerich,
)
print(query)
```
 
%% Output
 
SELECT * FROM osm_point WHERE geom @ ST_MakeEnvelope (8.43, 47.32, 8.65, 47.46, 4326) AND (tags->'ale supply' in ('limited') OR tags->'amenity' in ('bar', 'biergarten', 'cafe', 'canteen', 'fast food', 'food court', 'hookah lounge', 'osmica', 'pub', 'restaurant', 'social club') OR tags->'FR:amenity' in ('biergarten', 'fast food', 'hookah lounge') OR tags->'cuisine' in ('american', 'asian', 'bagel', 'barbecue', 'beef bowl', 'brazilian', 'brunch', 'bubble tea', 'burger', 'cake', 'catalan', 'chicken', 'chinese', 'coffee shop', 'couscous', 'crepe', 'curry', 'dessert', 'donut', 'dumpling', 'empanada', 'fish', 'fish and chips', 'french', 'friture', 'german', 'greek', 'grill', 'gyros', 'hotpot', 'hot dog', 'ice cream', 'indian', 'italian', 'japanese', 'jause', 'kebab', 'korean', 'mediterranean', 'mexican', 'noodle', 'pakistani', 'pancake', 'pasta', 'pastry', 'pie', 'pizza', 'ramen', 'regional', 'sandwich', 'seafood', 'soup', 'spanish', 'steak house', 'strudel', 'sushi', 'tapas', 'teahouse', 'tea shop', 'tex-mex', 'thai', 'turkish', 'vietnamese', 'waffle', 'wings') OR tags->'diet:meat' in ('yes') OR tags->'fast food' in ('cafeteria') OR tags->'microbrewery' in ('yes'));
 
%% Cell type:code id: tags:
 
``` python
# Test workflow
 
raw_osm_data = poi_tag_filter(
raw_osm_data = filter_pois_by_tag(
test_db,
tag_filter_dict=tag_filter_dict,
bbox=bbox_zuerich,
)
 
# Show map with data and bbox
 
m = raw_osm_data.explore()
xmin, ymin, xmax, ymax = bbox_zuerich
folium.Rectangle(
bounds=[[ymin, xmin], [ymax, xmax]],
# bounds=[[47.0445969, 7.5054728], [47.5733682, 8.8675476]],
color='red',
fill=False,
fill_color='red',
fill_opacity=0.2
).add_to(m)
 
m
```
 
%% Output
 
<folium.folium.Map at 0x7f97646c64f0>
 
%% Cell type:markdown id: tags:
 
## Some notes for osm2pqsql (importing the OsmDB)
 
#TODO (re)move if no longer needed
 
This is done now in a airflow dag
 
run the docker:
- docker run --name osm-completeness-postgis -e POSTGRES_PASSWORD=postgres -p 5432:5432 -d postgis/postgis
- docker exec -ti osm-completeness-postgis psql -U postgres
 
remote connect:
- psql -h localhost -U postgres -p 5432
- \? help for psql
- \h <SQL COMMAND> help for SQL, e.g. \h CREATEq
- \q quit
- \l list db's
- \d list tables
- \d <table> show structure
- \c <dbname>
 
default style: https://learnosm.org/files/default.style
 
create a db:
- createdb -h localhost -p 5432 -U postgres -W -e zuerich_osm_pbf
- OR inside psql: CREATE DATABASE my_new_database;
 
activate postgis and hstore:
- psql -h localhost -U postgres -p 5432 -d zuerich_osm_pbf
- (verify selected db: SELECT current_database();)
- CREATE EXTENSION IF NOT EXISTS postgis;
- (verify posgis: SELECT postgis_version();)
- CREATE EXTENSION hstore;
 
Import:
- osm2pgsql -c -d zuerich_osm_pbf -U postgres -H localhost -P 5432 -W -S data/0-raw/sql/default.style data/0-raw/pbf/Zuerich.osm.pbf
- osm2pgsql -c -d name_only -U postgres -H localhost -P 5432 -W -S data/0-raw/sql/name_only.style --hstore-all data/0-raw/pbf/Zuerich.osm.pbf
 
no pw prompt:
- PGPASSWORD=postgres <cmd>
......@@ -6,13 +6,17 @@ d = { 'settings': { 'branch': 'main',
'git_url': 'https://github.com/ifs/osm-completeness',
'lib_path': 'osm_completeness'},
'syms': { 'osm_completeness.constants': {},
'osm_completeness.data_gathering': { 'osm_completeness.data_gathering._build_sql_query_by_filter': ( 'data_gathering__query_osmdb_by_filter.html#_build_sql_query_by_filter',
'osm_completeness/data_gathering.py'),
'osm_completeness.data_gathering._convert_to_legacy_gdf': ( 'data_gathering__query_osmdb_by_filter.html#_convert_to_legacy_gdf',
'osm_completeness.data_gathering': { 'osm_completeness.data_gathering._convert_to_legacy_gdf': ( 'data_gathering__filter_pois_by_tag.html#_convert_to_legacy_gdf',
'osm_completeness/data_gathering.py'),
'osm_completeness.data_gathering.build_sql_group_filter': ( 'data_gathering__query_osmdb_by_filter.html#build_sql_group_filter',
'osm_completeness.data_gathering._create_sql_filter_query': ( 'data_gathering__filter_pois_by_tag.html#_create_sql_filter_query',
'osm_completeness/data_gathering.py'),
'osm_completeness.data_gathering._create_sql_tag_filter': ( 'data_gathering__filter_pois_by_tag.html#_create_sql_tag_filter',
'osm_completeness/data_gathering.py'),
'osm_completeness.data_gathering.query_osmdb_by_filter': ( 'data_gathering__query_osmdb_by_filter.html#query_osmdb_by_filter',
'osm_completeness.data_gathering.filter_pois_by_tag': ( 'data_gathering__filter_pois_by_tag.html#filter_pois_by_tag',
'osm_completeness/data_gathering.py'),
'osm_completeness.data_gathering.load_tag_filter': ( 'data_gathering__filter_pois_by_tag.html#load_tag_filter',
'osm_completeness/data_gathering.py'),
'osm_completeness.data_gathering.query_osmdb_by_filter': ( 'data_gathering__filter_pois_by_tag.html#query_osmdb_by_filter',
'osm_completeness/data_gathering.py')},
'osm_completeness.data_preparation': { 'osm_completeness.data_preparation._cleaning_count_all': ( 'data_preparation__cleaning_count.html#_cleaning_count_all',
'osm_completeness/data_preparation.py'),
......@@ -182,6 +186,18 @@ d = { 'settings': { 'branch': 'main',
'osm_completeness/mlflow_connection.py'),
'osm_completeness.mlflow_connection.MLflowConnectionManager._initialize_mlflow': ( 'helper_functions__mlflow_connection.html#mlflowconnectionmanager._initialize_mlflow',
'osm_completeness/mlflow_connection.py')},
'osm_completeness.mlflow_dummy': { 'osm_completeness.mlflow_dummy.MLflowDummy': ( 'helper_functions__mlflow_dummy.html#mlflowdummy',
'osm_completeness/mlflow_dummy.py'),
'osm_completeness.mlflow_dummy.MLflowDummy.__call__': ( 'helper_functions__mlflow_dummy.html#mlflowdummy.__call__',
'osm_completeness/mlflow_dummy.py'),
'osm_completeness.mlflow_dummy.MLflowDummy.__enter__': ( 'helper_functions__mlflow_dummy.html#mlflowdummy.__enter__',
'osm_completeness/mlflow_dummy.py'),
'osm_completeness.mlflow_dummy.MLflowDummy.__exit__': ( 'helper_functions__mlflow_dummy.html#mlflowdummy.__exit__',
'osm_completeness/mlflow_dummy.py'),
'osm_completeness.mlflow_dummy.MLflowDummy.__getattr__': ( 'helper_functions__mlflow_dummy.html#mlflowdummy.__getattr__',
'osm_completeness/mlflow_dummy.py'),
'osm_completeness.mlflow_dummy.MLflowDummy.__str__': ( 'helper_functions__mlflow_dummy.html#mlflowdummy.__str__',
'osm_completeness/mlflow_dummy.py')},
'osm_completeness.osm_manager': { 'osm_completeness.osm_manager.OSMManager': ( 'data_gathering__parse_pbf_by_filter.html#osmmanager',
'osm_completeness/osm_manager.py'),
'osm_completeness.osm_manager.OSMManager.__init__': ( 'data_gathering__parse_pbf_by_filter.html#osmmanager.__init__',
......@@ -247,8 +263,8 @@ d = { 'settings': { 'branch': 'main',
'osm_completeness/persistence.py')},
'osm_completeness.spatial': { 'osm_completeness.spatial.EPSG': ( 'helper_functions__spatial.html#epsg',
'osm_completeness/spatial.py'),
'osm_completeness.spatial.calculate_grid_length': ( 'helper_functions__spatial.html#calculate_grid_length',
'osm_completeness/spatial.py'),
'osm_completeness.spatial.calculate_grid_length_switzerland': ( 'helper_functions__spatial.html#calculate_grid_length_switzerland',
'osm_completeness/spatial.py'),
'osm_completeness.spatial.crs_convert': ( 'helper_functions__spatial.html#crs_convert',
'osm_completeness/spatial.py'),
'osm_completeness.spatial.distance_m': ( 'helper_functions__spatial.html#distance_m',
......
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/101_data_gathering__query_osmdb_by_filter.ipynb.
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/101_data_gathering__filter_pois_by_tag.ipynb.
# %% auto 0
__all__ = ['build_sql_group_filter', 'query_osmdb_by_filter']
__all__ = ['load_tag_filter', 'filter_pois_by_tag', 'query_osmdb_by_filter']
# %% ../nbs/101_data_gathering__query_osmdb_by_filter.ipynb 4
# %% ../nbs/101_data_gathering__filter_pois_by_tag.ipynb 3
import json
from pathlib import Path
from typing import Sequence, Union
from typing import Sequence, Union, Dict
import geopandas as gpd
from sqlalchemy import create_engine
......@@ -14,21 +14,24 @@ from sqlalchemy.exc import OperationalError, ResourceClosedError
from .osmdb import DEFAULT_SRID, OsmDB, create_sql_bbox, test_db
# %% ../nbs/101_data_gathering__query_osmdb_by_filter.ipynb 6
def build_sql_group_filter(
group_filter_file: Union[
Path, str
], # path to json file containing the group filters
) -> str:
group_filter_file = Path(group_filter_file)
if not group_filter_file.exists():
raise FileNotFoundError(f"Group filter file {group_filter_file} does not exist")
# %% ../nbs/101_data_gathering__filter_pois_by_tag.ipynb 5
def load_tag_filter(path: Union[str, Path]) -> dict:
"""Load the POI filter from a JSON file."""
path = Path(path)
if not path.exists():
raise FileNotFoundError(f"POI filter file {path} does not exist")
with open(path, "r") as f:
return json.load(f)
# %% ../nbs/101_data_gathering__filter_pois_by_tag.ipynb 6
# TODO use create sql function from osmdb
with open(group_filter_file, "r") as f:
filter_dict = json.load(f)
def _create_sql_tag_filter(
tag_filter_dict: Dict, # path to json file containing the group filters
) -> str:
sql_filter_strings = []
for key, values in filter_dict.items():
for key, values in tag_filter_dict.items():
single_quote = "'"
sql_values = (
f"({', '.join((f'{single_quote}{v}{single_quote}' for v in values))})"
......@@ -37,26 +40,27 @@ def build_sql_group_filter(
return " OR ".join(sql_filter_strings)
# %% ../nbs/101_data_gathering__query_osmdb_by_filter.ipynb 7
# %% ../nbs/101_data_gathering__filter_pois_by_tag.ipynb 7
# TODO use create sql function from osmdb
def _build_sql_query_by_filter(
group_filter_file: Union[
Path, str
], # path to json file containing the group filters
def _create_sql_filter_query(
tag_filter_dict: Dict, # dict containing the tag filters
bbox: Sequence[float], # bbox lat/lon: [xmin, ymin, xmax, ymax]
table_name: str = "osm_point",
geom_col_name: str = "geom", # column that contains the postgis geometry
srid: int = DEFAULT_SRID,
) -> str:
sql_bbox = create_sql_bbox(bbox)
sql_filter = build_sql_group_filter(group_filter_file)
sql_filter = _create_sql_tag_filter(tag_filter_dict)
sql_query = f"SELECT * FROM {table_name} WHERE {geom_col_name} @ {sql_bbox} AND ({sql_filter});"
return sql_query
# %% ../nbs/101_data_gathering__query_osmdb_by_filter.ipynb 8
# %% ../nbs/101_data_gathering__filter_pois_by_tag.ipynb 8
# TODO remove if no longer needed
def _convert_to_legacy_gdf(
gdf: gpd.GeoDataFrame, group_filter_name
) -> gpd.GeoDataFrame:
......@@ -76,8 +80,21 @@ def _convert_to_legacy_gdf(
return gdf_oldstyle
# %% ../nbs/101_data_gathering__query_osmdb_by_filter.ipynb 10
# TODO docstring
# %% ../nbs/101_data_gathering__filter_pois_by_tag.ipynb 10
def filter_pois_by_tag(
osmdb: OsmDB,
tag_filter_dict: Dict, # dict containing the tag filters
bbox: Sequence[float], # lon/lat [xmin, ymin, xmax, ymax]
) -> gpd.GeoDataFrame:
"""
Query the OSM database in a certain area (bbox) for POIs using a tag filter.
"""
sql_query = _create_sql_filter_query(tag_filter_dict, bbox)
gdf = osmdb.execute_query_gdf(sql_query)
return gdf
# %% ../nbs/101_data_gathering__filter_pois_by_tag.ipynb 11
# TODO remove if no longer needed
def query_osmdb_by_filter(
......@@ -91,7 +108,7 @@ def query_osmdb_by_filter(
complete workflow to query the OSM database by a group filter
"""
group_filter_file = Path(group_filter_file) # Make sure its a path object
sql_query = _build_sql_query_by_filter(group_filter_file, bbox)
sql_query = _create_sql_filter_query(group_filter_file, bbox)
gdf = osmdb.execute_query_gdf(sql_query)
gdf = _convert_to_legacy_gdf(gdf, group_filter_file.stem)
return gdf
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/3009_helper_functions__mlflow_dummy.ipynb.
# %% auto 0
__all__ = ['mlflow']
# %% ../nbs/3009_helper_functions__mlflow_dummy.ipynb 4
# TODO clean up import
# %% ../nbs/3009_helper_functions__mlflow_dummy.ipynb 6
class MLflowDummy:
"""
Dummy that can be called and is also a context manager.
It will always return itself, so that you can chain calls.
"""
def __getattr__(self, name):
return self
def __call__(self, *args, **kwargs):
return self
def __enter__(self):
return self
def __exit__(self, *args, **kwargs):
pass
def __str__(self) -> str:
return "MLflowDummy"
# Old approach
# class DummyContextManager():
# def __enter__(self):
# return self
# def __exit__(self, *args, **kwargs):
# pass
# class MLflowDummy():
# def __getattr__(self, name):
# if name=='start_run':
# return lambda *args, **kwargs: DummyContextManager()
# return lambda *args, **kwargs: None
# %% ../nbs/3009_helper_functions__mlflow_dummy.ipynb 8
# Singleton
mlflow = MLflowDummy()
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/3010_helper_functions__spatial.ipynb.
# %% auto 0
__all__ = ['WGS84_CRS', 'LV95_CRS', 'WEB_CRS', 'EPSG', 'crs_convert', 'distance_m', 'calculate_grid_length']
__all__ = ['WGS84_CRS', 'LV95_CRS', 'WEB_CRS', 'EPSG', 'crs_convert', 'distance_m', 'calculate_grid_length_switzerland']
# %% ../nbs/3010_helper_functions__spatial.ipynb 4
import math
from typing import Sequence
from typing import Sequence, Tuple, Union
import geopandas as gpd
import pandas as pd
......@@ -23,9 +23,9 @@ def EPSG(crs: int) -> str:
return f"EPSG:{crs}"
# %% ../nbs/3010_helper_functions__spatial.ipynb 7
def crs_convert(y_lat, x_lon, from_crs: int, to_crs: int) -> Sequence[float]:
transformer = Transformer.from_crs(from_crs, to_crs)
return transformer.transform(y_lat, x_lon)
def crs_convert(x_lon, y_lat, from_crs: int, to_crs: int) -> Sequence[float]:
transformer = Transformer.from_crs(from_crs, to_crs, always_xy=True)
return transformer.transform(x_lon, y_lat)
# %% ../nbs/3010_helper_functions__spatial.ipynb 8
def distance_m(
......@@ -38,9 +38,30 @@ def distance_m(
return distance
# %% ../nbs/3010_helper_functions__spatial.ipynb 9
def calculate_grid_length(lat, lon, length_m: int = 100, grid_crs: int = WEB_CRS):
"""Get the length of 100m in Web Mercator at a given location (lat,lon in WGS84)"""
lv95_ = crs_convert(lat, lon, WGS84_CRS, LV95_CRS)
web_1 = crs_convert(lat, lon, WGS84_CRS, grid_crs)
web_2 = crs_convert(lv95_[0] + length_m, lv95_[1] + length_m, LV95_CRS, grid_crs)
return web_2[0] - web_1[0], web_2[1] - web_1[1]
# TODO this only works for Switzerland (LV95): Use pyproj Geod and fwd to calculate the distance instead
def calculate_grid_length_switzerland(
target_crs: int,
lat: float = 46.801111, # Geographical centre of Switzerland
lon: float = 8.226667, # Geographical centre of Switzerland
length_m: int = 100,
) -> Tuple[float, float]:
"""Get the length in target crs units at a given location (lat,lon in WGS84)"""
half_length_m = length_m / 2
# Convert the location to LV95
lv95_ = crs_convert(lon, lat, WGS84_CRS, LV95_CRS)
grid_lower_left = crs_convert(
lv95_[0] - half_length_m, lv95_[1] - half_length_m, LV95_CRS, target_crs
)
grid_upper_right = crs_convert(
lv95_[0] + half_length_m, lv95_[1] + half_length_m, LV95_CRS, target_crs
)
return (
grid_upper_right[0] - grid_lower_left[0],
grid_upper_right[1] - grid_lower_left[1],
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment