From e7bda326f3da70e342243296855f52fecdf3cf63 Mon Sep 17 00:00:00 2001 From: gmatosferreira Date: Tue, 21 Apr 2026 16:26:41 +0100 Subject: [PATCH 1/3] Revert "#27 refactored vignette for OSM update, deprecating oauth as it does not work anymore and using user/password instead" This reverts commit 75d83fb623876ba688b2184be3493c745c127e9b. --- vignettes/osm_update.Rmd | 99 ++++++++++++++++++++++++++++++++-------- 1 file changed, 79 insertions(+), 20 deletions(-) diff --git a/vignettes/osm_update.Rmd b/vignettes/osm_update.Rmd index 11d6170..6c446ea 100644 --- a/vignettes/osm_update.Rmd +++ b/vignettes/osm_update.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set( cache = TRUE, eval = FALSE, # Don't run code, just display! comment = "#>", - engine.path = list(python = "~/.virtualenvs/r-reticulate/bin/python3.12") + engine.path = list(python = '~/.virtualenvs/r-reticulate/bin/python3.12') ) ``` @@ -25,9 +25,20 @@ The association between OSM route relations id and the GTFS shapes_id returned b Python library [OsmApi](https://osmapi.metaodi.ch/osmapi/OsmApi.html) enables to perform this batch process using OpenStreetMaps API. This article, adapted from an [osmapi example script](https://github.com/metaodi/osmapi/blob/16aeb189c9ff8db607cc118842133b8cd7b60971/examples/oauth2.py), aims to document this procedure. -It can be run either at the development or live version of OSM, requiring credentials to be run on either of them. +It can be run either at the development or live version of OSM, requiring API credentials to be run on either of them, that should be stored as environment variables. + +> To get credentials for the development version, go to https://master.apis.dev.openstreetmap.org/oauth2/applications. +> They should be stored as `OSM_OAUTH_CLIENT_ID_DEV` and `OSM_OAUTH_CLIENT_SECRET_DEV`. + +> To get credentials for the live version, go to https://www.openstreetmap.org/oauth2/applications. +> They should be stored as `OSM_OAUTH_CLIENT_ID_PROD` and `OSM_OAUTH_CLIENT_SECRET_PROD`. + +> API registration parameters: +> Redirect uri: `urn:ietf:wg:oauth:2.0:oob` +> Permissions: `write_api`, `write_notes`, `read_prefs` + +The environment variables can be edited using `usethis::edit_r_environ()`, on R. -This script considers credentials to be stored at environment variables (`OSM_USER` and `OSM_PASSWORD`), to avoid hardcoding them in the script. When running it, either replace them manually by your credentials or, better, set them as environment variables, using `usethis::edit_r_environ()` on R, or by exporting them in the terminal. # Match GTFS shapes with OSM routes @@ -35,8 +46,8 @@ This example assumes that the steps described at [Matching shapes geometry](./os ```{r} write.csv(shapes_match_routes |> sf::st_drop_geometry() |> mutate( - distance_diff = round(distance_diff), - points_diff = round(points_diff) + distance_diff=round(distance_diff), + points_diff=round(points_diff) ), "osm_match.csv", row.names = FALSE) ``` @@ -44,12 +55,7 @@ Then, it should be loaded into the Python environment, filtering the matches by ```{python} import pandas as pd -area = "stcp" -gtfs_date = "2026-04-16" -run_date = "20260416" -base_folder = "../../GTFShift-web/scripts/osm_match" # "./" # "../../GTFShift-web/scripts/osm_match" - -df = pd.read_csv(f"{base_folder}/{area}/shapes_match_stcp_gtfs{gtfs_date}_run{run_date}.csv") # CSV with columns osm_id, shape_id and route_id (optional) +df = pd.read_csv("osm_match.csv") # CSV with columns osm_id, shape_id and route_id (optional) len(df) df = df[(df['distance_diff'] < 1000) & (df['points_diff'] < 500)] # Filter to only update those that meet threshold len(df) @@ -73,7 +79,7 @@ import logging If there is any dependency missing, just install it with `reticulate::py_install()`. ```{r} # library(reticulate) -# py_install("dotenv") +# py_install("geopy") ``` Load environment variables @@ -83,12 +89,10 @@ load_dotenv(find_dotenv()) Set up logging ```{python} -# If previous logger definition exists, remove it to avoid duplicated logs -if 'logger' in globals(): - logger.handlers.clear() logger = logging.getLogger('my_logger') logger.setLevel(logging.DEBUG) # Set the base logging level -log_file_path = f'{base_folder}/{area}/shapes_match_{area}_gtfs{gtfs_date}_run{run_date}.osm.log' +# Use a generic log file path; replace {city} and {date} as needed +log_file_path = '../releases/{version}/shapes_match_{city}_gtfs{date}_run{date}.osm.log' file_handler = logging.FileHandler(log_file_path) file_handler.setLevel(logging.DEBUG) console_handler = logging.StreamHandler() @@ -104,12 +108,67 @@ logger.addHandler(console_handler) # Connect to OSM -Open the API connection +Either connect to the live OSM version... +```{python} +client_id = os.getenv("OSM_OAUTH_CLIENT_ID_PROD") # To edit env vars on RStudio, use usethis::edit_r_environ() +client_secret = os.getenv("OSM_OAUTH_CLIENT_SECRET_PROD") +authorization_base_url = "https://www.openstreetmap.org/oauth2/authorize" +token_url = "https://www.openstreetmap.org/oauth2/token" +api_url = "https://api.openstreetmap.org" +``` + +... or the dev one. It is recommended to try to apply the changes first at the dev version, to avoid corrupting the live OSM data. +```{python} +client_id = os.getenv("OSM_OAUTH_CLIENT_ID_DEV") # To edit env vars on RStudio, use usethis::edit_r_environ() +client_secret = os.getenv("OSM_OAUTH_CLIENT_SECRET_DEV") +authorization_base_url = "https://master.apis.dev.openstreetmap.org/oauth2/authorize" +token_url = "https://master.apis.dev.openstreetmap.org/oauth2/token" +api_url = "https://api06.dev.openstreetmap.org" +``` + +If no window is opened, authenticate by clicking on the url that is printed in the console. ```{python} -api_url = "https://api.openstreetmap.org" # Live OSM -# api_url = "https://api06.dev.openstreetmap.org" # Use this for testing on the development OSM instance, but make sure to change credentials accordingly +redirect_uri = "urn:ietf:wg:oauth:2.0:oob" +oauth2client = OAuth2Client( + token_endpoint=token_url, + authorization_endpoint=authorization_base_url, + redirect_uri=redirect_uri, + client_id=client_id, + client_secret=client_secret, + auth_method="client_secret_post", + code_challenge_method=None +) + +# Open OSM website to authorize user using the write_api and write_notes scope +scope = ["write_api", "write_notes", "read_prefs"] +az_request = oauth2client.authorization_request(scope=scope) +print(f"Authorize user using this URL: {az_request.uri}") +webbrowser.open(az_request.uri) # If on studio web, this might not work. If so, just open the link printed in the command before and jump to next line. +``` -api = osmapi.OsmApi(api=api_url, username=os.getenv("OSM_USER"), password=os.getenv("OSM_PASSWORD")) +After authenticating, you will be presented with an authorization code. Copy it and set `auth_code` to its value. +```{python} +auth_code = "" # Replace with your authorization code +auth_code +auth = OAuth2AuthorizationCodeAuth( + oauth2client, + auth_code, + redirect_uri=redirect_uri, +) +oauth_session = requests.Session() +oauth_session.auth = auth +``` + +Test authentication (should return 200 status code) +```{python} +resp = oauth_session.get(f"{api_url}/api/0.6/user/details") +print(resp.status_code) # 200 is expected +# print(resp.text) # This can help debugging if something goes wrong +``` + +Finally, open the API connection +```{python} +api = osmapi.OsmApi(api=api_url, session=oauth_session) ``` From 13d700548fa026bbf316f947b2a09a1243d75066 Mon Sep 17 00:00:00 2001 From: gmatosferreira Date: Tue, 21 Apr 2026 16:56:57 +0100 Subject: [PATCH 2/3] #73 osm update fix and global script improvements --- vignettes/osm_update.Rmd | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/vignettes/osm_update.Rmd b/vignettes/osm_update.Rmd index 6c446ea..db7db56 100644 --- a/vignettes/osm_update.Rmd +++ b/vignettes/osm_update.Rmd @@ -15,7 +15,7 @@ knitr::opts_chunk$set( cache = TRUE, eval = FALSE, # Don't run code, just display! comment = "#>", - engine.path = list(python = '~/.virtualenvs/r-reticulate/bin/python3.12') + engine.path = list(python = "~/.virtualenvs/r-reticulate/bin/python3.12") ) ``` @@ -46,8 +46,8 @@ This example assumes that the steps described at [Matching shapes geometry](./os ```{r} write.csv(shapes_match_routes |> sf::st_drop_geometry() |> mutate( - distance_diff=round(distance_diff), - points_diff=round(points_diff) + distance_diff = round(distance_diff), + points_diff = round(points_diff) ), "osm_match.csv", row.names = FALSE) ``` @@ -55,7 +55,12 @@ Then, it should be loaded into the Python environment, filtering the matches by ```{python} import pandas as pd -df = pd.read_csv("osm_match.csv") # CSV with columns osm_id, shape_id and route_id (optional) +area = "stcp" +gtfs_date = "2026-04-16" +run_date = "20260416" +base_folder = "./" + +df = pd.read_csv(f"{base_folder}/{area}/shapes_match_stcp_gtfs{gtfs_date}_run{run_date}.csv") # CSV with columns osm_id, shape_id and route_id (optional) len(df) df = df[(df['distance_diff'] < 1000) & (df['points_diff'] < 500)] # Filter to only update those that meet threshold len(df) @@ -74,6 +79,9 @@ from dotenv import load_dotenv, find_dotenv import os import pandas as pd import logging + +# Patch requests default User-Agent to avoid being blocked by OSM (see https://github.com/Zverik/cli-oauth2/issues/7) +requests.utils.default_user_agent = lambda: "GTFShift/0.8.3 (ushift@tecnico.ulisboa.pt)" ``` If there is any dependency missing, just install it with `reticulate::py_install()`. @@ -89,10 +97,12 @@ load_dotenv(find_dotenv()) Set up logging ```{python} +# If previous logger definition exists, remove it to avoid duplicated logs +if 'logger' in globals(): + logger.handlers.clear() logger = logging.getLogger('my_logger') logger.setLevel(logging.DEBUG) # Set the base logging level -# Use a generic log file path; replace {city} and {date} as needed -log_file_path = '../releases/{version}/shapes_match_{city}_gtfs{date}_run{date}.osm.log' +log_file_path = f'{base_folder}/{area}/shapes_match_{area}_gtfs{gtfs_date}_run{run_date}.osm.log' file_handler = logging.FileHandler(log_file_path) file_handler.setLevel(logging.DEBUG) console_handler = logging.StreamHandler() @@ -179,7 +189,7 @@ Attention! The script below will open a changeset and update OSM data! Make sure ```{python} # Create change set, updating relations with tag gtfs:shape_id # The changeset comment can be customized to better describe the change submitted -with api.Changeset({"comment": "GTFS shapes and routes association (using GTFShift v0.7.0)", "review_requested": "no", "locale": "pt", "source": "local knowledge"}) as changeset_id: +with api.Changeset({"comment": "GTFS shapes and routes association (using GTFShift v0.8.3)", "review_requested": "no", "locale": "pt", "source": "local knowledge"}) as changeset_id: logger.info(f"Running changeset {changeset_id} for {len(df)} relations") logger.info(f"{'route_id':20s}{'shape_id':20s} | {'osm_id':20s} | {'osm_route_id':20s}{'updated?':10s}{'osm_shape_id':20s}{'updated?':10s} | {'operation status':20s}") for idx, row in df.iterrows(): From a6b4a8ac5a6bf2081f5dbc126fed265bb96004dc Mon Sep 17 00:00:00 2001 From: gmatosferreira Date: Tue, 21 Apr 2026 17:01:14 +0100 Subject: [PATCH 3/3] #73 osmapi methods updated to match v5.0.0 snake_case instead of previous versions CamelCase --- vignettes/osm_update.Rmd | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vignettes/osm_update.Rmd b/vignettes/osm_update.Rmd index db7db56..27ebc48 100644 --- a/vignettes/osm_update.Rmd +++ b/vignettes/osm_update.Rmd @@ -189,7 +189,7 @@ Attention! The script below will open a changeset and update OSM data! Make sure ```{python} # Create change set, updating relations with tag gtfs:shape_id # The changeset comment can be customized to better describe the change submitted -with api.Changeset({"comment": "GTFS shapes and routes association (using GTFShift v0.8.3)", "review_requested": "no", "locale": "pt", "source": "local knowledge"}) as changeset_id: +with api.change_set({"comment": "GTFS shapes and routes association (using GTFShift v0.8.3)", "review_requested": "no", "locale": "pt", "source": "local knowledge"}) as changeset_id: logger.info(f"Running changeset {changeset_id} for {len(df)} relations") logger.info(f"{'route_id':20s}{'shape_id':20s} | {'osm_id':20s} | {'osm_route_id':20s}{'updated?':10s}{'osm_shape_id':20s}{'updated?':10s} | {'operation status':20s}") for idx, row in df.iterrows(): @@ -197,7 +197,7 @@ with api.Changeset({"comment": "GTFS shapes and routes association (using GTFShi shape_id = str(row["shape_id"]) osm_id = int(row["osm_id"]) - relation = api.RelationGet(osm_id) + relation = api.relation_get(osm_id) relation_shape = str(relation["tag"]["gtfs:shape_id"]) if "gtfs:shape_id" in relation["tag"] else "-" relation_route = str(relation["tag"]["gtfs:route_id"]) if "gtfs:route_id" in relation["tag"] else "-" @@ -206,7 +206,7 @@ with api.Changeset({"comment": "GTFS shapes and routes association (using GTFShi relation["tag"]["gtfs:shape_id"] = str(shape_id) # https://wiki.openstreetmap.org/wiki/Key:gtfs:shape_id if route_id: relation["tag"]["gtfs:route_id"] = str(route_id) # https://wiki.openstreetmap.org/wiki/Key:gtfs:route_id - update = api.RelationUpdate(relation) + update = api.relation_update(relation) status = "Updated" else: status = "Skipped" @@ -220,14 +220,14 @@ with api.Changeset({"comment": "GTFS shapes and routes association (using GTFShi If you need to rollback the changes, use the code below. ```{python} -with api.Changeset({"comment": "GTFS shapes association rollback", "review_requested": "no", "locale": "pt", "source": "local knowledge"}) as changeset_id: +with api.change_set({"comment": "GTFS shapes association rollback", "review_requested": "no", "locale": "pt", "source": "local knowledge"}) as changeset_id: for idx, row in df.iterrows(): osm_id = int(row["osm_id"]) - relation = api.RelationGet(osm_id) - relation_prev = api.RelationGet(osm_id, RelationVersion=relation["version"]-1) + relation = api.relation_get(osm_id) + relation_prev = api.relation_get(osm_id, RelationVersion=relation["version"]-1) logger.info(f"{osm_id} {relation['tag']['gtfs:shape_id'] if 'gtfs:shape_id' in relation['tag'] else '-'} Current {relation['version']} Previous {relation_prev['version']}") relation_prev["version"] = relation["version"] # We need to set version to last to enable update - update = api.RelationUpdate(relation_prev) + update = api.relation_update(relation_prev) ``` ## Validate changes @@ -240,7 +240,7 @@ for idx, row in df.iterrows(): route_id = str(row["route_id"]) if "route_id" in row else None osm_id = int(row["osm_id"]) - relation = api.RelationGet(osm_id) + relation = api.relation_get(osm_id) relation_shape = str(relation["tag"]["gtfs:shape_id"]) if "gtfs:shape_id" in relation["tag"] else "-" relation_route = str(relation["tag"]["gtfs:route_id"]) if "gtfs:route_id" in relation["tag"] else "-"