apb_pandas_utils.oracle_pandas
1# coding=utf-8 2# # 3# Author: Ernesto Arredondo Martinez (ernestone@gmail.com) 4# File: oracle_pandas.py 5# Created: 05/04/2020, 17:41 6# Last modified: 10/11/2019, 11:24 7# Copyright (c) 2020 8 9import geopandas as gpd 10import pandas as pd 11from pandas.api.types import CategoricalDtype 12from functools import wraps 13 14try: 15 from apb_cx_oracle_spatial.gestor_oracle import sql_tab 16 _HAS_ORACLE_DEPS = True 17except ImportError: 18 _HAS_ORACLE_DEPS = False 19 20from . import optimize_df 21 22 23def requires_oracle_deps(func): 24 """Decorator to check if Oracle dependencies are installed""" 25 @wraps(func) 26 def wrapper(*args, **kwargs): 27 if not _HAS_ORACLE_DEPS: 28 raise ImportError( 29 "Oracle functionality requires additional dependencies. " 30 "Install with 'pip install apb_pandas_utils[oracle]'" 31 ) 32 return func(*args, **kwargs) 33 return wrapper 34 35 36@requires_oracle_deps 37def df_for_sqlgen(a_ora_generator, columns_index=None, columns=None, 38 optimize=False, 39 **params_df): 40 """ 41 A partir de generator de gestor_oracle devuelve pandas.Dataframe 42 43 Args: 44 a_ora_generator: 45 columns_index: 46 columns: lista con nombre de columnas a mostrar 47 optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe 48 params_df: parametros creación Dataframe 49 50 Returns: 51 pandas.Dataframe 52 """ 53 df = pd.DataFrame(a_ora_generator, **params_df) 54 if not df.empty and columns_index: 55 df.set_index([c.upper() for c in columns_index], inplace=True) 56 57 if columns: 58 df = df[[c.upper() for c in columns if c in df.columns]] 59 60 if optimize: 61 df = optimize_df(df) 62 63 return df 64 65 66@requires_oracle_deps 67def gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen): 68 """ 69 A partir de generator de gestor_oracle devuelve geopandas.Dataframe 70 71 Args: 72 a_ora_generator: 73 column_geom: 74 crs (str=None): name EPSG crs 75 **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen() 76 77 Returns: 78 geopandas.Dataframe 79 """ 80 column_geom = column_geom.upper() 81 cols = params_df_for_sqlgen.pop('columns', []) 82 if cols: 83 params_df_for_sqlgen['columns'] = cols_sql(cols, [column_geom]) 84 85 gdf = gpd.GeoDataFrame(df_for_sqlgen(a_ora_generator, 86 **params_df_for_sqlgen), 87 geometry=column_geom, 88 crs=crs) 89 90 return gdf 91 92 93@requires_oracle_deps 94def cols_sql(cols, *l_cols_add): 95 """ 96 Retorna lista de columnas 97 98 Args: 99 cols (list): 100 *l_cols_add: listas de columnas a añadir 101 102 Returns: 103 list 104 """ 105 if cols: 106 cols = [c.upper() for c in cols] 107 for added_cols in l_cols_add: 108 cols += list(map(str.upper, added_cols)) 109 110 return list(set(cols)) 111 112 113@requires_oracle_deps 114def df_table(gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen): 115 """ 116 Devuelve pandas.DataFrame para una tablas de oracle 117 118 Args: 119 gest_ora: 120 nom_tab: 121 filter_sql: 122 *args_filter_sql: 123 **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen() 124 125 Returns: 126 pandas.Dataframe 127 """ 128 dd_tab = gest_ora.get_dd_table(nom_tab) 129 cols = params_df_for_sqlgen.get("columns", []).copy() 130 cols_idx = params_df_for_sqlgen.pop('columns_index', dd_tab.pk()) 131 132 df_tab = df_for_sqlgen( 133 gest_ora.generator_rows_sql(sql_tab(nom_tab, 134 filter_sql, 135 cols_sql(cols, 136 cols_idx)), 137 *args_filter_sql, 138 geom_format="as_geojson"), 139 columns_index=cols_idx, 140 **params_df_for_sqlgen) 141 142 if not cols: 143 df_tab = df_tab[[c for c in dd_tab.cols if c in df_tab.columns]] 144 145 return df_tab 146 147 148@requires_oracle_deps 149def gdf_table(gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False, 150 filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen): 151 """ 152 Devuelve geopandas.GeoDataframe para una tablas de oracle 153 154 Args: 155 gest_ora: 156 nom_tab: 157 column_geom: 158 other_cols_geom: 159 null_geoms: 160 filter_sql: 161 *args_filter_sql: 162 **params_gdf_for_sqlgen: 163 164 Returns: 165 geopandas.GeoDataframe 166 """ 167 dd = gest_ora.get_dd_table(nom_tab) 168 column_geom = column_geom.upper() 169 cols = params_gdf_for_sqlgen.pop('columns', []) 170 if not other_cols_geom: 171 if not cols: 172 cols = [c for c in dd.alfas(include_pk=False)] 173 else: 174 other_geoms = [g for g in dd.geoms().keys() if g != column_geom.upper()] 175 cols = [c for c in cols 176 if c.upper() not in other_geoms] 177 178 cols_idx = params_gdf_for_sqlgen.pop('columns_index', dd.pk()) 179 180 if not null_geoms: 181 not_null_geoms_sql = "{} is not null".format(column_geom) 182 if filter_sql: 183 filter_sql = "{} and ({})".format(not_null_geoms_sql, filter_sql) 184 else: 185 filter_sql = not_null_geoms_sql 186 187 crs_epsg = None 188 if (tip_geom := gest_ora.get_tip_camp_geom(nom_tab, column_geom)) and \ 189 (epsg_code := gest_ora.get_epsg_for_srid(tip_geom.SRID)): 190 crs_epsg = f'EPSG:{epsg_code}' 191 192 gdf_tab = gdf_for_sqlgen(gest_ora.generator_rows_sql(sql_tab(nom_tab, 193 filter_sql, 194 cols_sql(cols, 195 cols_idx, 196 [column_geom])), 197 *args_filter_sql, 198 geom_format="as_shapely"), 199 column_geom=column_geom, 200 crs=crs_epsg, 201 columns_index=cols_idx, 202 columns=cols, **params_gdf_for_sqlgen) 203 204 if not cols: 205 gdf_tab = gdf_tab[[c for c in dd.cols if c in gdf_tab.columns]] 206 207 return gdf_tab 208 209 210@requires_oracle_deps 211def dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col): 212 """ 213 Retorna pandas.dtype de tipo categorico para la columna de una tabla 214 215 Args: 216 gest_ora: 217 nom_tab: 218 nom_col: 219 220 Returns: 221 pandas.CategoricalDtype 222 """ 223 return CategoricalDtype(categories=gest_ora.iter_distinct_vals_camp_tab(nom_tab, nom_col))
def
requires_oracle_deps(func):
24def requires_oracle_deps(func): 25 """Decorator to check if Oracle dependencies are installed""" 26 @wraps(func) 27 def wrapper(*args, **kwargs): 28 if not _HAS_ORACLE_DEPS: 29 raise ImportError( 30 "Oracle functionality requires additional dependencies. " 31 "Install with 'pip install apb_pandas_utils[oracle]'" 32 ) 33 return func(*args, **kwargs) 34 return wrapper
Decorator to check if Oracle dependencies are installed
@requires_oracle_deps
def
df_for_sqlgen( a_ora_generator, columns_index=None, columns=None, optimize=False, **params_df):
37@requires_oracle_deps 38def df_for_sqlgen(a_ora_generator, columns_index=None, columns=None, 39 optimize=False, 40 **params_df): 41 """ 42 A partir de generator de gestor_oracle devuelve pandas.Dataframe 43 44 Args: 45 a_ora_generator: 46 columns_index: 47 columns: lista con nombre de columnas a mostrar 48 optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe 49 params_df: parametros creación Dataframe 50 51 Returns: 52 pandas.Dataframe 53 """ 54 df = pd.DataFrame(a_ora_generator, **params_df) 55 if not df.empty and columns_index: 56 df.set_index([c.upper() for c in columns_index], inplace=True) 57 58 if columns: 59 df = df[[c.upper() for c in columns if c in df.columns]] 60 61 if optimize: 62 df = optimize_df(df) 63 64 return df
A partir de generator de gestor_oracle devuelve pandas.Dataframe
Arguments:
- a_ora_generator:
- columns_index:
- columns: lista con nombre de columnas a mostrar
- optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe
- params_df: parametros creación Dataframe
Returns:
pandas.Dataframe
@requires_oracle_deps
def
gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen):
67@requires_oracle_deps 68def gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen): 69 """ 70 A partir de generator de gestor_oracle devuelve geopandas.Dataframe 71 72 Args: 73 a_ora_generator: 74 column_geom: 75 crs (str=None): name EPSG crs 76 **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen() 77 78 Returns: 79 geopandas.Dataframe 80 """ 81 column_geom = column_geom.upper() 82 cols = params_df_for_sqlgen.pop('columns', []) 83 if cols: 84 params_df_for_sqlgen['columns'] = cols_sql(cols, [column_geom]) 85 86 gdf = gpd.GeoDataFrame(df_for_sqlgen(a_ora_generator, 87 **params_df_for_sqlgen), 88 geometry=column_geom, 89 crs=crs) 90 91 return gdf
A partir de generator de gestor_oracle devuelve geopandas.Dataframe
Arguments:
- a_ora_generator:
- column_geom:
- crs (str=None): name EPSG crs
- **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
Returns:
geopandas.Dataframe
@requires_oracle_deps
def
cols_sql(cols, *l_cols_add):
94@requires_oracle_deps 95def cols_sql(cols, *l_cols_add): 96 """ 97 Retorna lista de columnas 98 99 Args: 100 cols (list): 101 *l_cols_add: listas de columnas a añadir 102 103 Returns: 104 list 105 """ 106 if cols: 107 cols = [c.upper() for c in cols] 108 for added_cols in l_cols_add: 109 cols += list(map(str.upper, added_cols)) 110 111 return list(set(cols))
Retorna lista de columnas
Arguments:
- cols (list):
- *l_cols_add: listas de columnas a añadir
Returns:
list
@requires_oracle_deps
def
df_table( gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen):
114@requires_oracle_deps 115def df_table(gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen): 116 """ 117 Devuelve pandas.DataFrame para una tablas de oracle 118 119 Args: 120 gest_ora: 121 nom_tab: 122 filter_sql: 123 *args_filter_sql: 124 **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen() 125 126 Returns: 127 pandas.Dataframe 128 """ 129 dd_tab = gest_ora.get_dd_table(nom_tab) 130 cols = params_df_for_sqlgen.get("columns", []).copy() 131 cols_idx = params_df_for_sqlgen.pop('columns_index', dd_tab.pk()) 132 133 df_tab = df_for_sqlgen( 134 gest_ora.generator_rows_sql(sql_tab(nom_tab, 135 filter_sql, 136 cols_sql(cols, 137 cols_idx)), 138 *args_filter_sql, 139 geom_format="as_geojson"), 140 columns_index=cols_idx, 141 **params_df_for_sqlgen) 142 143 if not cols: 144 df_tab = df_tab[[c for c in dd_tab.cols if c in df_tab.columns]] 145 146 return df_tab
Devuelve pandas.DataFrame para una tablas de oracle
Arguments:
- gest_ora:
- nom_tab:
- filter_sql:
- *args_filter_sql:
- **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
Returns:
pandas.Dataframe
@requires_oracle_deps
def
gdf_table( gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False, filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen):
149@requires_oracle_deps 150def gdf_table(gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False, 151 filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen): 152 """ 153 Devuelve geopandas.GeoDataframe para una tablas de oracle 154 155 Args: 156 gest_ora: 157 nom_tab: 158 column_geom: 159 other_cols_geom: 160 null_geoms: 161 filter_sql: 162 *args_filter_sql: 163 **params_gdf_for_sqlgen: 164 165 Returns: 166 geopandas.GeoDataframe 167 """ 168 dd = gest_ora.get_dd_table(nom_tab) 169 column_geom = column_geom.upper() 170 cols = params_gdf_for_sqlgen.pop('columns', []) 171 if not other_cols_geom: 172 if not cols: 173 cols = [c for c in dd.alfas(include_pk=False)] 174 else: 175 other_geoms = [g for g in dd.geoms().keys() if g != column_geom.upper()] 176 cols = [c for c in cols 177 if c.upper() not in other_geoms] 178 179 cols_idx = params_gdf_for_sqlgen.pop('columns_index', dd.pk()) 180 181 if not null_geoms: 182 not_null_geoms_sql = "{} is not null".format(column_geom) 183 if filter_sql: 184 filter_sql = "{} and ({})".format(not_null_geoms_sql, filter_sql) 185 else: 186 filter_sql = not_null_geoms_sql 187 188 crs_epsg = None 189 if (tip_geom := gest_ora.get_tip_camp_geom(nom_tab, column_geom)) and \ 190 (epsg_code := gest_ora.get_epsg_for_srid(tip_geom.SRID)): 191 crs_epsg = f'EPSG:{epsg_code}' 192 193 gdf_tab = gdf_for_sqlgen(gest_ora.generator_rows_sql(sql_tab(nom_tab, 194 filter_sql, 195 cols_sql(cols, 196 cols_idx, 197 [column_geom])), 198 *args_filter_sql, 199 geom_format="as_shapely"), 200 column_geom=column_geom, 201 crs=crs_epsg, 202 columns_index=cols_idx, 203 columns=cols, **params_gdf_for_sqlgen) 204 205 if not cols: 206 gdf_tab = gdf_tab[[c for c in dd.cols if c in gdf_tab.columns]] 207 208 return gdf_tab
Devuelve geopandas.GeoDataframe para una tablas de oracle
Arguments:
- gest_ora:
- nom_tab:
- column_geom:
- other_cols_geom:
- null_geoms:
- filter_sql:
- *args_filter_sql:
- **params_gdf_for_sqlgen:
Returns:
geopandas.GeoDataframe
@requires_oracle_deps
def
dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col):
211@requires_oracle_deps 212def dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col): 213 """ 214 Retorna pandas.dtype de tipo categorico para la columna de una tabla 215 216 Args: 217 gest_ora: 218 nom_tab: 219 nom_col: 220 221 Returns: 222 pandas.CategoricalDtype 223 """ 224 return CategoricalDtype(categories=gest_ora.iter_distinct_vals_camp_tab(nom_tab, nom_col))
Retorna pandas.dtype de tipo categorico para la columna de una tabla
Arguments:
- gest_ora:
- nom_tab:
- nom_col:
Returns:
pandas.CategoricalDtype