apb_pandas_utils.geopandas_utils
1# coding=utf-8 2# 3# Author: Ernesto Arredondo Martinez (ernestone@gmail.com) 4# Created: 7/6/19 18:23 5# Last modified: 7/6/19 18:21 6# Copyright (c) 2019 7import json 8from typing import Optional 9 10import requests 11from geopandas import GeoDataFrame, GeoSeries 12from pandas import DataFrame, Series 13from shapely import wkt 14 15 16def gdf_to_geojson(gdf: GeoDataFrame, name: Optional[str] = None, with_crs: bool = True, show_bbox: bool = True, 17 drop_id: bool = False, path_file: str = None) -> dict: 18 """ 19 Convierte un GeoDataFrame a diccionario geojson 20 21 Args: 22 gdf (GeoDataFrame): 23 name (str=None): 24 with_crs (bool=True): 25 show_bbox (bool=True): 26 drop_id (bool=False): 27 path_file (str=None): Si se indica se guarda el geojson en el path indicado 28 29 Returns: 30 dict_geojson (dict) 31 """ 32 dict_geojson = gdf.to_geo_dict(show_bbox=show_bbox, drop_id=drop_id) 33 if name: 34 dict_geojson["name"] = name 35 if with_crs and gdf.crs is not None: 36 auth = gdf.crs.to_authority() 37 dict_geojson["crs"] = {"type": "name", "properties": {"name": f"urn:ogc:def:crs:{auth[0]}::{auth[1]}"}} 38 39 if path_file: 40 geojson = json.dumps(dict_geojson, default=str, ensure_ascii=False) 41 with open(path_file, 'w', encoding='utf-8') as f: 42 f.write(geojson) 43 44 return dict_geojson 45 46 47def gdf_to_df(gdf: GeoDataFrame, as_wkb=False) -> DataFrame: 48 """ 49 Convert a GeoDataFrame to DataFrame converting the geometry columns to a str column in WKT format (WKB if as_wkb=True) 50 51 Args: 52 gdf (GeoDataFrame): 53 as_wkb (bool=False): If True, the geometry column is converted to WKB format 54 55 Returns: 56 DataFrame 57 """ 58 f_conv = 'to_wkb' if as_wkb else 'to_wkt' 59 60 # Convert all columns type geometry to WKT 61 gdf_aux = gdf.copy() 62 for col in df_geometry_columns(gdf_aux): 63 gdf_aux[col] = getattr(gdf_aux[col], f_conv)() 64 return DataFrame(gdf_aux) 65 66 67def df_geometry_columns(df: GeoDataFrame | DataFrame) -> list: 68 """ 69 Devuelve las columnas tipo geometría de un GeoDataFrame 70 71 Args: 72 df (GeoDataFrame | DataFrame): 73 74 Returns: 75 list 76 """ 77 return df.select_dtypes(include=["geometry"]).columns.tolist() 78 79 80def df_to_crs(df: GeoDataFrame | DataFrame, crs: str) -> GeoDataFrame | DataFrame: 81 """ 82 Convierte todas las columnas tipo geometría de un GeoDataFrame o DataFrame al CRS indicado 83 84 Args: 85 df (GeoDataFrame | DataFrame): 86 crs (str): name CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831') 87 [Can be anything accepted by pyproj.CRS.from_user_input()] 88 89 Returns: 90 GeoDataFrame | DataFrame 91 """ 92 df_aux = df.copy() 93 for geom in df_geometry_columns(df_aux): 94 df_aux[geom] = df_aux[geom].to_crs(crs) 95 96 df_aux = df_aux.to_crs(crs) 97 98 return df_aux 99 100 101def gdf_from_df(df: DataFrame, geom_col: str, crs: str, cols_geom: list[str] = None) -> GeoDataFrame: 102 """ 103 Crea un GeoDataFrame a partir de un DataFrame 104 105 Args: 106 df (DataFrame): 107 geom_col (str): Columna geometría con el que se creará el GeoDataFrame 108 crs (str): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831') 109 [Can be anything accepted by pyproj.CRS.from_user_input()] 110 cols_geom (list=None): Columnas con geometrías 111 112 Returns: 113 GeoDataFrame 114 """ 115 if cols_geom is None: 116 cols_geom = [] 117 118 cols_geom = set(cols_geom) 119 cols_geom.add(geom_col) 120 121 df_aux = df.copy() 122 idx_prev = df_aux.index 123 # We only deal with index when has names setted referred to possible columns 124 set_idx = None not in idx_prev.names 125 if set_idx: 126 df_aux.reset_index(inplace=True) 127 128 def convert_to_wkt(val_col): 129 return wkt.loads(val_col) if isinstance(val_col, str) else None 130 131 gdf = GeoDataFrame(df_aux) 132 for col in (col for col in gdf.columns if col in cols_geom): 133 ds_col = gdf[col] 134 if isinstance(ds_col, GeoSeries): 135 continue 136 137 if (dtype := ds_col.dtype.name) == 'object': 138 gdf[col] = gdf[col].apply(convert_to_wkt) 139 140 gdf.set_geometry(col, inplace=True, crs=crs) 141 142 if set_idx: 143 gdf = gdf.set_index(idx_prev.names, drop=True) 144 145 gdf.set_geometry(geom_col, crs=crs, inplace=True) 146 147 return gdf 148 149 150def gdf_from_url(url_rest_api, api_params=None, crs_api=None, headers=None, crs_gdf=None, add_goto_url=False): 151 """ 152 Fetch paginated GeoJSON from a REST API and return a GeoPandas GeoDataFrame. 153 154 Assumes the API returns a GeoJSON FeatureCollection with 'features' and optionally 'next' for pagination. 155 If 'next' is present, it should be the full URL for the next page. 156 157 Args: 158 url_rest_api (str): The base URL of the API endpoint. 159 api_params (dict, optional): Query parameters for the initial request. 160 crs_api (str, optional): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831') 161 [Can be anything accepted by pyproj.CRS.from_user_input()] 162 headers (dict, optional): HTTP headers for the request. 163 crs_gdf (str, optional): CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831') 164 [Can be anything accepted by pyproj.CRS.from_user_input()] 165 add_goto_url (bool, optional): If True, adds a 'goto_url' to the GeoDataFrame as new column. 166 167 Returns: 168 gpd.GeoDataFrame | None: A GeoDataFrame containing all features from all pages. 169 170 Raises: 171 requests.HTTPError: If any request fails. 172 """ 173 gdf = None 174 all_features = [] 175 url = url_rest_api 176 params = api_params or {} 177 first_request = True 178 179 while url: 180 if first_request: 181 response = requests.get(url, params=params, headers=headers) 182 first_request = False 183 else: 184 response = requests.get(url, headers=headers) 185 186 response.raise_for_status() 187 data = response.json() 188 189 # Assuming GeoJSON FeatureCollection. Test results or data directly 190 all_features.extend(data.get('results', data).get('features', [])) 191 192 # Check for next page 193 url = data.get('next') 194 195 # Create GeoDataFrame from all features 196 if all_features: 197 gdf = GeoDataFrame.from_features(all_features, crs=crs_api) 198 199 if add_goto_url: 200 centroids = gdf.geometry.centroid.to_crs('EPSG:4326') 201 mask = centroids.notna() 202 gdf['goto_url'] = Series([None] * len(gdf), index=gdf.index) 203 gdf.loc[mask, 'goto_url'] = \ 204 ("https://www.google.com/maps?q=" + 205 centroids.loc[mask].y.astype(str) + "," + 206 centroids.loc[mask].x.astype(str)) 207 208 if crs_gdf: 209 gdf = gdf.to_crs(crs_gdf) 210 211 return gdf
17def gdf_to_geojson(gdf: GeoDataFrame, name: Optional[str] = None, with_crs: bool = True, show_bbox: bool = True, 18 drop_id: bool = False, path_file: str = None) -> dict: 19 """ 20 Convierte un GeoDataFrame a diccionario geojson 21 22 Args: 23 gdf (GeoDataFrame): 24 name (str=None): 25 with_crs (bool=True): 26 show_bbox (bool=True): 27 drop_id (bool=False): 28 path_file (str=None): Si se indica se guarda el geojson en el path indicado 29 30 Returns: 31 dict_geojson (dict) 32 """ 33 dict_geojson = gdf.to_geo_dict(show_bbox=show_bbox, drop_id=drop_id) 34 if name: 35 dict_geojson["name"] = name 36 if with_crs and gdf.crs is not None: 37 auth = gdf.crs.to_authority() 38 dict_geojson["crs"] = {"type": "name", "properties": {"name": f"urn:ogc:def:crs:{auth[0]}::{auth[1]}"}} 39 40 if path_file: 41 geojson = json.dumps(dict_geojson, default=str, ensure_ascii=False) 42 with open(path_file, 'w', encoding='utf-8') as f: 43 f.write(geojson) 44 45 return dict_geojson
Convierte un GeoDataFrame a diccionario geojson
Arguments:
- gdf (GeoDataFrame):
- name (str=None):
- with_crs (bool=True):
- show_bbox (bool=True):
- drop_id (bool=False):
- path_file (str=None): Si se indica se guarda el geojson en el path indicado
Returns:
dict_geojson (dict)
48def gdf_to_df(gdf: GeoDataFrame, as_wkb=False) -> DataFrame: 49 """ 50 Convert a GeoDataFrame to DataFrame converting the geometry columns to a str column in WKT format (WKB if as_wkb=True) 51 52 Args: 53 gdf (GeoDataFrame): 54 as_wkb (bool=False): If True, the geometry column is converted to WKB format 55 56 Returns: 57 DataFrame 58 """ 59 f_conv = 'to_wkb' if as_wkb else 'to_wkt' 60 61 # Convert all columns type geometry to WKT 62 gdf_aux = gdf.copy() 63 for col in df_geometry_columns(gdf_aux): 64 gdf_aux[col] = getattr(gdf_aux[col], f_conv)() 65 return DataFrame(gdf_aux)
Convert a GeoDataFrame to DataFrame converting the geometry columns to a str column in WKT format (WKB if as_wkb=True)
Arguments:
- gdf (GeoDataFrame):
- as_wkb (bool=False): If True, the geometry column is converted to WKB format
Returns:
DataFrame
68def df_geometry_columns(df: GeoDataFrame | DataFrame) -> list: 69 """ 70 Devuelve las columnas tipo geometría de un GeoDataFrame 71 72 Args: 73 df (GeoDataFrame | DataFrame): 74 75 Returns: 76 list 77 """ 78 return df.select_dtypes(include=["geometry"]).columns.tolist()
Devuelve las columnas tipo geometría de un GeoDataFrame
Arguments:
- df (GeoDataFrame | DataFrame):
Returns:
list
81def df_to_crs(df: GeoDataFrame | DataFrame, crs: str) -> GeoDataFrame | DataFrame: 82 """ 83 Convierte todas las columnas tipo geometría de un GeoDataFrame o DataFrame al CRS indicado 84 85 Args: 86 df (GeoDataFrame | DataFrame): 87 crs (str): name CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831') 88 [Can be anything accepted by pyproj.CRS.from_user_input()] 89 90 Returns: 91 GeoDataFrame | DataFrame 92 """ 93 df_aux = df.copy() 94 for geom in df_geometry_columns(df_aux): 95 df_aux[geom] = df_aux[geom].to_crs(crs) 96 97 df_aux = df_aux.to_crs(crs) 98 99 return df_aux
Convierte todas las columnas tipo geometría de un GeoDataFrame o DataFrame al CRS indicado
Arguments:
- df (GeoDataFrame | DataFrame):
- crs (str): name CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831') [Can be anything accepted by pyproj.CRS.from_user_input()]
Returns:
GeoDataFrame | DataFrame
102def gdf_from_df(df: DataFrame, geom_col: str, crs: str, cols_geom: list[str] = None) -> GeoDataFrame: 103 """ 104 Crea un GeoDataFrame a partir de un DataFrame 105 106 Args: 107 df (DataFrame): 108 geom_col (str): Columna geometría con el que se creará el GeoDataFrame 109 crs (str): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831') 110 [Can be anything accepted by pyproj.CRS.from_user_input()] 111 cols_geom (list=None): Columnas con geometrías 112 113 Returns: 114 GeoDataFrame 115 """ 116 if cols_geom is None: 117 cols_geom = [] 118 119 cols_geom = set(cols_geom) 120 cols_geom.add(geom_col) 121 122 df_aux = df.copy() 123 idx_prev = df_aux.index 124 # We only deal with index when has names setted referred to possible columns 125 set_idx = None not in idx_prev.names 126 if set_idx: 127 df_aux.reset_index(inplace=True) 128 129 def convert_to_wkt(val_col): 130 return wkt.loads(val_col) if isinstance(val_col, str) else None 131 132 gdf = GeoDataFrame(df_aux) 133 for col in (col for col in gdf.columns if col in cols_geom): 134 ds_col = gdf[col] 135 if isinstance(ds_col, GeoSeries): 136 continue 137 138 if (dtype := ds_col.dtype.name) == 'object': 139 gdf[col] = gdf[col].apply(convert_to_wkt) 140 141 gdf.set_geometry(col, inplace=True, crs=crs) 142 143 if set_idx: 144 gdf = gdf.set_index(idx_prev.names, drop=True) 145 146 gdf.set_geometry(geom_col, crs=crs, inplace=True) 147 148 return gdf
Crea un GeoDataFrame a partir de un DataFrame
Arguments:
- df (DataFrame):
- geom_col (str): Columna geometría con el que se creará el GeoDataFrame
- crs (str): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831') [Can be anything accepted by pyproj.CRS.from_user_input()]
- cols_geom (list=None): Columnas con geometrías
Returns:
GeoDataFrame
151def gdf_from_url(url_rest_api, api_params=None, crs_api=None, headers=None, crs_gdf=None, add_goto_url=False): 152 """ 153 Fetch paginated GeoJSON from a REST API and return a GeoPandas GeoDataFrame. 154 155 Assumes the API returns a GeoJSON FeatureCollection with 'features' and optionally 'next' for pagination. 156 If 'next' is present, it should be the full URL for the next page. 157 158 Args: 159 url_rest_api (str): The base URL of the API endpoint. 160 api_params (dict, optional): Query parameters for the initial request. 161 crs_api (str, optional): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831') 162 [Can be anything accepted by pyproj.CRS.from_user_input()] 163 headers (dict, optional): HTTP headers for the request. 164 crs_gdf (str, optional): CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831') 165 [Can be anything accepted by pyproj.CRS.from_user_input()] 166 add_goto_url (bool, optional): If True, adds a 'goto_url' to the GeoDataFrame as new column. 167 168 Returns: 169 gpd.GeoDataFrame | None: A GeoDataFrame containing all features from all pages. 170 171 Raises: 172 requests.HTTPError: If any request fails. 173 """ 174 gdf = None 175 all_features = [] 176 url = url_rest_api 177 params = api_params or {} 178 first_request = True 179 180 while url: 181 if first_request: 182 response = requests.get(url, params=params, headers=headers) 183 first_request = False 184 else: 185 response = requests.get(url, headers=headers) 186 187 response.raise_for_status() 188 data = response.json() 189 190 # Assuming GeoJSON FeatureCollection. Test results or data directly 191 all_features.extend(data.get('results', data).get('features', [])) 192 193 # Check for next page 194 url = data.get('next') 195 196 # Create GeoDataFrame from all features 197 if all_features: 198 gdf = GeoDataFrame.from_features(all_features, crs=crs_api) 199 200 if add_goto_url: 201 centroids = gdf.geometry.centroid.to_crs('EPSG:4326') 202 mask = centroids.notna() 203 gdf['goto_url'] = Series([None] * len(gdf), index=gdf.index) 204 gdf.loc[mask, 'goto_url'] = \ 205 ("https://www.google.com/maps?q=" + 206 centroids.loc[mask].y.astype(str) + "," + 207 centroids.loc[mask].x.astype(str)) 208 209 if crs_gdf: 210 gdf = gdf.to_crs(crs_gdf) 211 212 return gdf
Fetch paginated GeoJSON from a REST API and return a GeoPandas GeoDataFrame.
Assumes the API returns a GeoJSON FeatureCollection with 'features' and optionally 'next' for pagination. If 'next' is present, it should be the full URL for the next page.
Arguments:
- url_rest_api (str): The base URL of the API endpoint.
- api_params (dict, optional): Query parameters for the initial request.
- crs_api (str, optional): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831') [Can be anything accepted by pyproj.CRS.from_user_input()]
- headers (dict, optional): HTTP headers for the request.
- crs_gdf (str, optional): CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831') [Can be anything accepted by pyproj.CRS.from_user_input()]
- add_goto_url (bool, optional): If True, adds a 'goto_url' to the GeoDataFrame as new column.
Returns:
gpd.GeoDataFrame | None: A GeoDataFrame containing all features from all pages.
Raises:
- requests.HTTPError: If any request fails.