apb_pandas_utils.geopandas_utils

  1#  coding=utf-8
  2#
  3#  Author: Ernesto Arredondo Martinez (ernestone@gmail.com)
  4#  Created: 7/6/19 18:23
  5#  Last modified: 7/6/19 18:21
  6#  Copyright (c) 2019
  7import json
  8from typing import Optional
  9
 10import requests
 11from geopandas import GeoDataFrame, GeoSeries
 12from pandas import DataFrame, Series
 13from shapely import wkt
 14
 15
 16def gdf_to_geojson(gdf: GeoDataFrame, name: Optional[str] = None, with_crs: bool = True, show_bbox: bool = True,
 17                   drop_id: bool = False, path_file: str = None) -> dict:
 18    """
 19    Convierte un GeoDataFrame a diccionario geojson
 20
 21    Args:
 22        gdf (GeoDataFrame):
 23        name (str=None):
 24        with_crs (bool=True):
 25        show_bbox (bool=True):
 26        drop_id (bool=False):
 27        path_file (str=None): Si se indica se guarda el geojson en el path indicado
 28
 29    Returns:
 30        dict_geojson (dict)
 31    """
 32    dict_geojson = gdf.to_geo_dict(show_bbox=show_bbox, drop_id=drop_id)
 33    if name:
 34        dict_geojson["name"] = name
 35    if with_crs and gdf.crs is not None:
 36        auth = gdf.crs.to_authority()
 37        dict_geojson["crs"] = {"type": "name", "properties": {"name": f"urn:ogc:def:crs:{auth[0]}::{auth[1]}"}}
 38
 39    if path_file:
 40        geojson = json.dumps(dict_geojson, default=str, ensure_ascii=False)
 41        with open(path_file, 'w', encoding='utf-8') as f:
 42            f.write(geojson)
 43
 44    return dict_geojson
 45
 46
 47def gdf_to_df(gdf: GeoDataFrame, as_wkb=False) -> DataFrame:
 48    """
 49    Convert a GeoDataFrame to DataFrame converting the geometry columns to a str column in WKT format (WKB if as_wkb=True)
 50
 51    Args:
 52        gdf (GeoDataFrame):
 53        as_wkb (bool=False): If True, the geometry column is converted to WKB format
 54
 55    Returns:
 56        DataFrame
 57    """
 58    f_conv = 'to_wkb' if as_wkb else 'to_wkt'
 59
 60    # Convert all columns type geometry to WKT
 61    gdf_aux = gdf.copy()
 62    for col in df_geometry_columns(gdf_aux):
 63        gdf_aux[col] = getattr(gdf_aux[col], f_conv)()
 64    return DataFrame(gdf_aux)
 65
 66
 67def df_geometry_columns(df: GeoDataFrame | DataFrame) -> list:
 68    """
 69    Devuelve las columnas tipo geometría de un GeoDataFrame
 70
 71    Args:
 72        df (GeoDataFrame | DataFrame):
 73
 74    Returns:
 75        list
 76    """
 77    return df.select_dtypes(include=["geometry"]).columns.tolist()
 78
 79
 80def df_to_crs(df: GeoDataFrame | DataFrame, crs: str) -> GeoDataFrame | DataFrame:
 81    """
 82    Convierte todas las columnas tipo geometría de un GeoDataFrame o DataFrame al CRS indicado
 83
 84    Args:
 85        df (GeoDataFrame | DataFrame):
 86        crs (str): name CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831')
 87                    [Can be anything accepted by pyproj.CRS.from_user_input()]
 88
 89    Returns:
 90        GeoDataFrame | DataFrame
 91    """
 92    df_aux = df.copy()
 93    for geom in df_geometry_columns(df_aux):
 94        df_aux[geom] = df_aux[geom].to_crs(crs)
 95
 96    df_aux = df_aux.to_crs(crs)
 97
 98    return df_aux
 99
100
101def gdf_from_df(df: DataFrame, geom_col: str, crs: str, cols_geom: list[str] = None) -> GeoDataFrame:
102    """
103    Crea un GeoDataFrame a partir de un DataFrame
104
105    Args:
106        df (DataFrame):
107        geom_col (str): Columna geometría con el que se creará el GeoDataFrame
108        crs (str): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831')
109                    [Can be anything accepted by pyproj.CRS.from_user_input()]
110        cols_geom (list=None): Columnas con geometrías
111
112    Returns:
113        GeoDataFrame
114    """
115    if cols_geom is None:
116        cols_geom = []
117
118    cols_geom = set(cols_geom)
119    cols_geom.add(geom_col)
120
121    df_aux = df.copy()
122    idx_prev = df_aux.index
123    # We only deal with index when has names setted referred to possible columns
124    set_idx = None not in idx_prev.names
125    if set_idx:
126        df_aux.reset_index(inplace=True)
127
128    def convert_to_wkt(val_col):
129        return wkt.loads(val_col) if isinstance(val_col, str) else None
130
131    gdf = GeoDataFrame(df_aux)
132    for col in (col for col in gdf.columns if col in cols_geom):
133        ds_col = gdf[col]
134        if isinstance(ds_col, GeoSeries):
135            continue
136
137        if (dtype := ds_col.dtype.name) == 'object':
138            gdf[col] = gdf[col].apply(convert_to_wkt)
139
140        gdf.set_geometry(col, inplace=True, crs=crs)
141
142    if set_idx:
143        gdf = gdf.set_index(idx_prev.names, drop=True)
144
145    gdf.set_geometry(geom_col, crs=crs, inplace=True)
146
147    return gdf
148
149
150def gdf_from_url(url_rest_api, api_params=None, crs_api=None, headers=None, crs_gdf=None, add_goto_url=False):
151    """
152    Fetch paginated GeoJSON from a REST API and return a GeoPandas GeoDataFrame.
153
154    Assumes the API returns a GeoJSON FeatureCollection with 'features' and optionally 'next' for pagination.
155    If 'next' is present, it should be the full URL for the next page.
156
157    Args:
158        url_rest_api (str): The base URL of the API endpoint.
159        api_params (dict, optional): Query parameters for the initial request.
160        crs_api (str, optional): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831')
161                    [Can be anything accepted by pyproj.CRS.from_user_input()]
162        headers (dict, optional): HTTP headers for the request.
163        crs_gdf (str, optional): CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831')
164                    [Can be anything accepted by pyproj.CRS.from_user_input()]
165        add_goto_url (bool, optional): If True, adds a 'goto_url' to the GeoDataFrame as new column.
166
167    Returns:
168        gpd.GeoDataFrame | None: A GeoDataFrame containing all features from all pages.
169
170    Raises:
171        requests.HTTPError: If any request fails.
172    """
173    gdf = None
174    all_features = []
175    url = url_rest_api
176    params = api_params or {}
177    first_request = True
178
179    while url:
180        if first_request:
181            response = requests.get(url, params=params, headers=headers)
182            first_request = False
183        else:
184            response = requests.get(url, headers=headers)
185
186        response.raise_for_status()
187        data = response.json()
188
189        # Assuming GeoJSON FeatureCollection. Test results or data directly
190        all_features.extend(data.get('results', data).get('features', []))
191
192        # Check for next page
193        url = data.get('next')
194
195    # Create GeoDataFrame from all features
196    if all_features:
197        gdf = GeoDataFrame.from_features(all_features, crs=crs_api)
198
199    if add_goto_url:
200        centroids = gdf.geometry.centroid.to_crs('EPSG:4326')
201        mask = centroids.notna()
202        gdf['goto_url'] = Series([None] * len(gdf), index=gdf.index)
203        gdf.loc[mask, 'goto_url'] = \
204            ("https://www.google.com/maps?q=" +
205             centroids.loc[mask].y.astype(str) + "," +
206             centroids.loc[mask].x.astype(str))
207
208    if crs_gdf:
209        gdf = gdf.to_crs(crs_gdf)
210
211    return gdf
def gdf_to_geojson( gdf: geopandas.geodataframe.GeoDataFrame, name: Optional[str] = None, with_crs: bool = True, show_bbox: bool = True, drop_id: bool = False, path_file: str = None) -> dict:
17def gdf_to_geojson(gdf: GeoDataFrame, name: Optional[str] = None, with_crs: bool = True, show_bbox: bool = True,
18                   drop_id: bool = False, path_file: str = None) -> dict:
19    """
20    Convierte un GeoDataFrame a diccionario geojson
21
22    Args:
23        gdf (GeoDataFrame):
24        name (str=None):
25        with_crs (bool=True):
26        show_bbox (bool=True):
27        drop_id (bool=False):
28        path_file (str=None): Si se indica se guarda el geojson en el path indicado
29
30    Returns:
31        dict_geojson (dict)
32    """
33    dict_geojson = gdf.to_geo_dict(show_bbox=show_bbox, drop_id=drop_id)
34    if name:
35        dict_geojson["name"] = name
36    if with_crs and gdf.crs is not None:
37        auth = gdf.crs.to_authority()
38        dict_geojson["crs"] = {"type": "name", "properties": {"name": f"urn:ogc:def:crs:{auth[0]}::{auth[1]}"}}
39
40    if path_file:
41        geojson = json.dumps(dict_geojson, default=str, ensure_ascii=False)
42        with open(path_file, 'w', encoding='utf-8') as f:
43            f.write(geojson)
44
45    return dict_geojson

Convierte un GeoDataFrame a diccionario geojson

Arguments:
  • gdf (GeoDataFrame):
  • name (str=None):
  • with_crs (bool=True):
  • show_bbox (bool=True):
  • drop_id (bool=False):
  • path_file (str=None): Si se indica se guarda el geojson en el path indicado
Returns:

dict_geojson (dict)

def gdf_to_df( gdf: geopandas.geodataframe.GeoDataFrame, as_wkb=False) -> pandas.core.frame.DataFrame:
48def gdf_to_df(gdf: GeoDataFrame, as_wkb=False) -> DataFrame:
49    """
50    Convert a GeoDataFrame to DataFrame converting the geometry columns to a str column in WKT format (WKB if as_wkb=True)
51
52    Args:
53        gdf (GeoDataFrame):
54        as_wkb (bool=False): If True, the geometry column is converted to WKB format
55
56    Returns:
57        DataFrame
58    """
59    f_conv = 'to_wkb' if as_wkb else 'to_wkt'
60
61    # Convert all columns type geometry to WKT
62    gdf_aux = gdf.copy()
63    for col in df_geometry_columns(gdf_aux):
64        gdf_aux[col] = getattr(gdf_aux[col], f_conv)()
65    return DataFrame(gdf_aux)

Convert a GeoDataFrame to DataFrame converting the geometry columns to a str column in WKT format (WKB if as_wkb=True)

Arguments:
  • gdf (GeoDataFrame):
  • as_wkb (bool=False): If True, the geometry column is converted to WKB format
Returns:

DataFrame

def df_geometry_columns( df: geopandas.geodataframe.GeoDataFrame | pandas.core.frame.DataFrame) -> list:
68def df_geometry_columns(df: GeoDataFrame | DataFrame) -> list:
69    """
70    Devuelve las columnas tipo geometría de un GeoDataFrame
71
72    Args:
73        df (GeoDataFrame | DataFrame):
74
75    Returns:
76        list
77    """
78    return df.select_dtypes(include=["geometry"]).columns.tolist()

Devuelve las columnas tipo geometría de un GeoDataFrame

Arguments:
  • df (GeoDataFrame | DataFrame):
Returns:

list

def df_to_crs( df: geopandas.geodataframe.GeoDataFrame | pandas.core.frame.DataFrame, crs: str) -> geopandas.geodataframe.GeoDataFrame | pandas.core.frame.DataFrame:
81def df_to_crs(df: GeoDataFrame | DataFrame, crs: str) -> GeoDataFrame | DataFrame:
82    """
83    Convierte todas las columnas tipo geometría de un GeoDataFrame o DataFrame al CRS indicado
84
85    Args:
86        df (GeoDataFrame | DataFrame):
87        crs (str): name CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831')
88                    [Can be anything accepted by pyproj.CRS.from_user_input()]
89
90    Returns:
91        GeoDataFrame | DataFrame
92    """
93    df_aux = df.copy()
94    for geom in df_geometry_columns(df_aux):
95        df_aux[geom] = df_aux[geom].to_crs(crs)
96
97    df_aux = df_aux.to_crs(crs)
98
99    return df_aux

Convierte todas las columnas tipo geometría de un GeoDataFrame o DataFrame al CRS indicado

Arguments:
  • df (GeoDataFrame | DataFrame):
  • crs (str): name CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831') [Can be anything accepted by pyproj.CRS.from_user_input()]
Returns:

GeoDataFrame | DataFrame

def gdf_from_df( df: pandas.core.frame.DataFrame, geom_col: str, crs: str, cols_geom: list[str] = None) -> geopandas.geodataframe.GeoDataFrame:
102def gdf_from_df(df: DataFrame, geom_col: str, crs: str, cols_geom: list[str] = None) -> GeoDataFrame:
103    """
104    Crea un GeoDataFrame a partir de un DataFrame
105
106    Args:
107        df (DataFrame):
108        geom_col (str): Columna geometría con el que se creará el GeoDataFrame
109        crs (str): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831')
110                    [Can be anything accepted by pyproj.CRS.from_user_input()]
111        cols_geom (list=None): Columnas con geometrías
112
113    Returns:
114        GeoDataFrame
115    """
116    if cols_geom is None:
117        cols_geom = []
118
119    cols_geom = set(cols_geom)
120    cols_geom.add(geom_col)
121
122    df_aux = df.copy()
123    idx_prev = df_aux.index
124    # We only deal with index when has names setted referred to possible columns
125    set_idx = None not in idx_prev.names
126    if set_idx:
127        df_aux.reset_index(inplace=True)
128
129    def convert_to_wkt(val_col):
130        return wkt.loads(val_col) if isinstance(val_col, str) else None
131
132    gdf = GeoDataFrame(df_aux)
133    for col in (col for col in gdf.columns if col in cols_geom):
134        ds_col = gdf[col]
135        if isinstance(ds_col, GeoSeries):
136            continue
137
138        if (dtype := ds_col.dtype.name) == 'object':
139            gdf[col] = gdf[col].apply(convert_to_wkt)
140
141        gdf.set_geometry(col, inplace=True, crs=crs)
142
143    if set_idx:
144        gdf = gdf.set_index(idx_prev.names, drop=True)
145
146    gdf.set_geometry(geom_col, crs=crs, inplace=True)
147
148    return gdf

Crea un GeoDataFrame a partir de un DataFrame

Arguments:
  • df (DataFrame):
  • geom_col (str): Columna geometría con el que se creará el GeoDataFrame
  • crs (str): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831') [Can be anything accepted by pyproj.CRS.from_user_input()]
  • cols_geom (list=None): Columnas con geometrías
Returns:

GeoDataFrame

def gdf_from_url( url_rest_api, api_params=None, crs_api=None, headers=None, crs_gdf=None, add_goto_url=False):
151def gdf_from_url(url_rest_api, api_params=None, crs_api=None, headers=None, crs_gdf=None, add_goto_url=False):
152    """
153    Fetch paginated GeoJSON from a REST API and return a GeoPandas GeoDataFrame.
154
155    Assumes the API returns a GeoJSON FeatureCollection with 'features' and optionally 'next' for pagination.
156    If 'next' is present, it should be the full URL for the next page.
157
158    Args:
159        url_rest_api (str): The base URL of the API endpoint.
160        api_params (dict, optional): Query parameters for the initial request.
161        crs_api (str, optional): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831')
162                    [Can be anything accepted by pyproj.CRS.from_user_input()]
163        headers (dict, optional): HTTP headers for the request.
164        crs_gdf (str, optional): CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831')
165                    [Can be anything accepted by pyproj.CRS.from_user_input()]
166        add_goto_url (bool, optional): If True, adds a 'goto_url' to the GeoDataFrame as new column.
167
168    Returns:
169        gpd.GeoDataFrame | None: A GeoDataFrame containing all features from all pages.
170
171    Raises:
172        requests.HTTPError: If any request fails.
173    """
174    gdf = None
175    all_features = []
176    url = url_rest_api
177    params = api_params or {}
178    first_request = True
179
180    while url:
181        if first_request:
182            response = requests.get(url, params=params, headers=headers)
183            first_request = False
184        else:
185            response = requests.get(url, headers=headers)
186
187        response.raise_for_status()
188        data = response.json()
189
190        # Assuming GeoJSON FeatureCollection. Test results or data directly
191        all_features.extend(data.get('results', data).get('features', []))
192
193        # Check for next page
194        url = data.get('next')
195
196    # Create GeoDataFrame from all features
197    if all_features:
198        gdf = GeoDataFrame.from_features(all_features, crs=crs_api)
199
200    if add_goto_url:
201        centroids = gdf.geometry.centroid.to_crs('EPSG:4326')
202        mask = centroids.notna()
203        gdf['goto_url'] = Series([None] * len(gdf), index=gdf.index)
204        gdf.loc[mask, 'goto_url'] = \
205            ("https://www.google.com/maps?q=" +
206             centroids.loc[mask].y.astype(str) + "," +
207             centroids.loc[mask].x.astype(str))
208
209    if crs_gdf:
210        gdf = gdf.to_crs(crs_gdf)
211
212    return gdf

Fetch paginated GeoJSON from a REST API and return a GeoPandas GeoDataFrame.

Assumes the API returns a GeoJSON FeatureCollection with 'features' and optionally 'next' for pagination. If 'next' is present, it should be the full URL for the next page.

Arguments:
  • url_rest_api (str): The base URL of the API endpoint.
  • api_params (dict, optional): Query parameters for the initial request.
  • crs_api (str, optional): CRS (EPSG) coord .sys. origen de las geometrías (e.g. 'EPSG:25831') [Can be anything accepted by pyproj.CRS.from_user_input()]
  • headers (dict, optional): HTTP headers for the request.
  • crs_gdf (str, optional): CRS (EPSG) coord .sys. destino de las geometrías (e.g. 'EPSG:25831') [Can be anything accepted by pyproj.CRS.from_user_input()]
  • add_goto_url (bool, optional): If True, adds a 'goto_url' to the GeoDataFrame as new column.
Returns:

gpd.GeoDataFrame | None: A GeoDataFrame containing all features from all pages.

Raises:
  • requests.HTTPError: If any request fails.