apb_pandas_utils.oracle_pandas
1# coding=utf-8 2# # 3# Author: Ernesto Arredondo Martinez (ernestone@gmail.com) 4# File: oracle_pandas.py 5# Created: 05/04/2020, 17:41 6# Last modified: 10/11/2019, 11:24 7# Copyright (c) 2020 8 9import geopandas as gpd 10import pandas as pd 11from pandas.api.types import CategoricalDtype 12 13from apb_cx_oracle_spatial.gestor_oracle import sql_tab 14from . import optimize_df 15 16 17def df_for_sqlgen(a_ora_generator, columns_index=None, columns=None, 18 optimize=False, 19 **params_df): 20 """ 21 A partir de generator de gestor_oracle devuelve pandas.Dataframe 22 23 Args: 24 a_ora_generator: 25 columns_index: 26 columns: lista con nombre de columnas a mostrar 27 optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe 28 params_df: parametros creación Dataframe 29 30 Returns: 31 pandas.Dataframe 32 """ 33 df = pd.DataFrame(a_ora_generator, **params_df) 34 if not df.empty and columns_index: 35 df.set_index([c.upper() for c in columns_index], inplace=True) 36 37 if columns: 38 df = df[[c.upper() for c in columns if c in df.columns]] 39 40 if optimize: 41 df = optimize_df(df) 42 43 return df 44 45 46def gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen): 47 """ 48 A partir de generator de gestor_oracle devuelve geopandas.Dataframe 49 50 Args: 51 a_ora_generator: 52 column_geom: 53 crs (str=None): name EPSG crs 54 **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen() 55 56 Returns: 57 geopandas.Dataframe 58 """ 59 column_geom = column_geom.upper() 60 cols = params_df_for_sqlgen.pop('columns', []) 61 if cols: 62 params_df_for_sqlgen['columns'] = cols_sql(cols, [column_geom]) 63 64 gdf = gpd.GeoDataFrame(df_for_sqlgen(a_ora_generator, 65 **params_df_for_sqlgen), 66 geometry=column_geom, 67 crs=crs) 68 69 return gdf 70 71 72def cols_sql(cols, *l_cols_add): 73 """ 74 Retorna lista de columnas 75 76 Args: 77 cols (list): 78 *l_cols_add: listas de columnas a añadir 79 80 Returns: 81 list 82 """ 83 if cols: 84 cols = [c.upper() for c in cols] 85 for added_cols in l_cols_add: 86 cols += list(map(str.upper, added_cols)) 87 88 return list(set(cols)) 89 90 91def df_table(gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen): 92 """ 93 Devuelve pandas.DataFrame para una tablas de oracle 94 95 Args: 96 gest_ora: 97 nom_tab: 98 filter_sql: 99 *args_filter_sql: 100 **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen() 101 102 Returns: 103 pandas.Dataframe 104 """ 105 dd_tab = gest_ora.get_dd_table(nom_tab) 106 cols = params_df_for_sqlgen.get("columns", []).copy() 107 cols_idx = params_df_for_sqlgen.pop('columns_index', dd_tab.pk()) 108 109 df_tab = df_for_sqlgen( 110 gest_ora.generator_rows_sql(sql_tab(nom_tab, 111 filter_sql, 112 cols_sql(cols, 113 cols_idx)), 114 *args_filter_sql, 115 geom_format="as_geojson"), 116 columns_index=cols_idx, 117 **params_df_for_sqlgen) 118 119 if not cols: 120 df_tab = df_tab[[c for c in dd_tab.cols if c in df_tab.columns]] 121 122 return df_tab 123 124 125def gdf_table(gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False, 126 filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen): 127 """ 128 Devuelve geopandas.GeoDataframe para una tablas de oracle 129 130 Args: 131 gest_ora: 132 nom_tab: 133 column_geom: 134 other_cols_geom: 135 null_geoms: 136 filter_sql: 137 *args_filter_sql: 138 **params_gdf_for_sqlgen: 139 140 Returns: 141 geopandas.GeoDataframe 142 """ 143 dd = gest_ora.get_dd_table(nom_tab) 144 column_geom = column_geom.upper() 145 cols = params_gdf_for_sqlgen.pop('columns', []) 146 if not other_cols_geom: 147 if not cols: 148 cols = [c for c in dd.alfas(include_pk=False)] 149 else: 150 other_geoms = [g for g in dd.geoms().keys() if g != column_geom.upper()] 151 cols = [c for c in cols 152 if c.upper() not in other_geoms] 153 154 cols_idx = params_gdf_for_sqlgen.pop('columns_index', dd.pk()) 155 156 if not null_geoms: 157 not_null_geoms_sql = "{} is not null".format(column_geom) 158 if filter_sql: 159 filter_sql = "{} and ({})".format(not_null_geoms_sql, filter_sql) 160 else: 161 filter_sql = not_null_geoms_sql 162 163 crs_epsg = None 164 if (tip_geom := gest_ora.get_tip_camp_geom(nom_tab, column_geom)) and \ 165 (epsg_code := gest_ora.get_epsg_for_srid(tip_geom.SRID)): 166 crs_epsg = f'EPSG:{epsg_code}' 167 168 gdf_tab = gdf_for_sqlgen(gest_ora.generator_rows_sql(sql_tab(nom_tab, 169 filter_sql, 170 cols_sql(cols, 171 cols_idx, 172 [column_geom])), 173 *args_filter_sql, 174 geom_format="as_shapely"), 175 column_geom=column_geom, 176 crs=crs_epsg, 177 columns_index=cols_idx, 178 columns=cols, **params_gdf_for_sqlgen) 179 180 if not cols: 181 gdf_tab = gdf_tab[[c for c in dd.cols if c in gdf_tab.columns]] 182 183 return gdf_tab 184 185 186def dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col): 187 """ 188 Retorna pandas.dtype de tipo categorico para la columna de una tabla 189 190 Args: 191 gest_ora: 192 nom_tab: 193 nom_col: 194 195 Returns: 196 pandas.CategoricalDtype 197 """ 198 return CategoricalDtype(categories=gest_ora.iter_distinct_vals_camp_tab(nom_tab, nom_col))
def
df_for_sqlgen( a_ora_generator, columns_index=None, columns=None, optimize=False, **params_df):
18def df_for_sqlgen(a_ora_generator, columns_index=None, columns=None, 19 optimize=False, 20 **params_df): 21 """ 22 A partir de generator de gestor_oracle devuelve pandas.Dataframe 23 24 Args: 25 a_ora_generator: 26 columns_index: 27 columns: lista con nombre de columnas a mostrar 28 optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe 29 params_df: parametros creación Dataframe 30 31 Returns: 32 pandas.Dataframe 33 """ 34 df = pd.DataFrame(a_ora_generator, **params_df) 35 if not df.empty and columns_index: 36 df.set_index([c.upper() for c in columns_index], inplace=True) 37 38 if columns: 39 df = df[[c.upper() for c in columns if c in df.columns]] 40 41 if optimize: 42 df = optimize_df(df) 43 44 return df
A partir de generator de gestor_oracle devuelve pandas.Dataframe
Arguments:
- a_ora_generator:
- columns_index:
- columns: lista con nombre de columnas a mostrar
- optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe
- params_df: parametros creación Dataframe
Returns:
pandas.Dataframe
def
gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen):
47def gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen): 48 """ 49 A partir de generator de gestor_oracle devuelve geopandas.Dataframe 50 51 Args: 52 a_ora_generator: 53 column_geom: 54 crs (str=None): name EPSG crs 55 **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen() 56 57 Returns: 58 geopandas.Dataframe 59 """ 60 column_geom = column_geom.upper() 61 cols = params_df_for_sqlgen.pop('columns', []) 62 if cols: 63 params_df_for_sqlgen['columns'] = cols_sql(cols, [column_geom]) 64 65 gdf = gpd.GeoDataFrame(df_for_sqlgen(a_ora_generator, 66 **params_df_for_sqlgen), 67 geometry=column_geom, 68 crs=crs) 69 70 return gdf
A partir de generator de gestor_oracle devuelve geopandas.Dataframe
Arguments:
- a_ora_generator:
- column_geom:
- crs (str=None): name EPSG crs
- **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
Returns:
geopandas.Dataframe
def
cols_sql(cols, *l_cols_add):
73def cols_sql(cols, *l_cols_add): 74 """ 75 Retorna lista de columnas 76 77 Args: 78 cols (list): 79 *l_cols_add: listas de columnas a añadir 80 81 Returns: 82 list 83 """ 84 if cols: 85 cols = [c.upper() for c in cols] 86 for added_cols in l_cols_add: 87 cols += list(map(str.upper, added_cols)) 88 89 return list(set(cols))
Retorna lista de columnas
Arguments:
- cols (list):
- *l_cols_add: listas de columnas a añadir
Returns:
list
def
df_table( gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen):
92def df_table(gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen): 93 """ 94 Devuelve pandas.DataFrame para una tablas de oracle 95 96 Args: 97 gest_ora: 98 nom_tab: 99 filter_sql: 100 *args_filter_sql: 101 **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen() 102 103 Returns: 104 pandas.Dataframe 105 """ 106 dd_tab = gest_ora.get_dd_table(nom_tab) 107 cols = params_df_for_sqlgen.get("columns", []).copy() 108 cols_idx = params_df_for_sqlgen.pop('columns_index', dd_tab.pk()) 109 110 df_tab = df_for_sqlgen( 111 gest_ora.generator_rows_sql(sql_tab(nom_tab, 112 filter_sql, 113 cols_sql(cols, 114 cols_idx)), 115 *args_filter_sql, 116 geom_format="as_geojson"), 117 columns_index=cols_idx, 118 **params_df_for_sqlgen) 119 120 if not cols: 121 df_tab = df_tab[[c for c in dd_tab.cols if c in df_tab.columns]] 122 123 return df_tab
Devuelve pandas.DataFrame para una tablas de oracle
Arguments:
- gest_ora:
- nom_tab:
- filter_sql:
- *args_filter_sql:
- **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
Returns:
pandas.Dataframe
def
gdf_table( gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False, filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen):
126def gdf_table(gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False, 127 filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen): 128 """ 129 Devuelve geopandas.GeoDataframe para una tablas de oracle 130 131 Args: 132 gest_ora: 133 nom_tab: 134 column_geom: 135 other_cols_geom: 136 null_geoms: 137 filter_sql: 138 *args_filter_sql: 139 **params_gdf_for_sqlgen: 140 141 Returns: 142 geopandas.GeoDataframe 143 """ 144 dd = gest_ora.get_dd_table(nom_tab) 145 column_geom = column_geom.upper() 146 cols = params_gdf_for_sqlgen.pop('columns', []) 147 if not other_cols_geom: 148 if not cols: 149 cols = [c for c in dd.alfas(include_pk=False)] 150 else: 151 other_geoms = [g for g in dd.geoms().keys() if g != column_geom.upper()] 152 cols = [c for c in cols 153 if c.upper() not in other_geoms] 154 155 cols_idx = params_gdf_for_sqlgen.pop('columns_index', dd.pk()) 156 157 if not null_geoms: 158 not_null_geoms_sql = "{} is not null".format(column_geom) 159 if filter_sql: 160 filter_sql = "{} and ({})".format(not_null_geoms_sql, filter_sql) 161 else: 162 filter_sql = not_null_geoms_sql 163 164 crs_epsg = None 165 if (tip_geom := gest_ora.get_tip_camp_geom(nom_tab, column_geom)) and \ 166 (epsg_code := gest_ora.get_epsg_for_srid(tip_geom.SRID)): 167 crs_epsg = f'EPSG:{epsg_code}' 168 169 gdf_tab = gdf_for_sqlgen(gest_ora.generator_rows_sql(sql_tab(nom_tab, 170 filter_sql, 171 cols_sql(cols, 172 cols_idx, 173 [column_geom])), 174 *args_filter_sql, 175 geom_format="as_shapely"), 176 column_geom=column_geom, 177 crs=crs_epsg, 178 columns_index=cols_idx, 179 columns=cols, **params_gdf_for_sqlgen) 180 181 if not cols: 182 gdf_tab = gdf_tab[[c for c in dd.cols if c in gdf_tab.columns]] 183 184 return gdf_tab
Devuelve geopandas.GeoDataframe para una tablas de oracle
Arguments:
- gest_ora:
- nom_tab:
- column_geom:
- other_cols_geom:
- null_geoms:
- filter_sql:
- *args_filter_sql:
- **params_gdf_for_sqlgen:
Returns:
geopandas.GeoDataframe
def
dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col):
187def dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col): 188 """ 189 Retorna pandas.dtype de tipo categorico para la columna de una tabla 190 191 Args: 192 gest_ora: 193 nom_tab: 194 nom_col: 195 196 Returns: 197 pandas.CategoricalDtype 198 """ 199 return CategoricalDtype(categories=gest_ora.iter_distinct_vals_camp_tab(nom_tab, nom_col))
Retorna pandas.dtype de tipo categorico para la columna de una tabla
Arguments:
- gest_ora:
- nom_tab:
- nom_col:
Returns:
pandas.CategoricalDtype