apb_pandas_utils.oracle_pandas

  1#   coding=utf-8
  2#  #
  3#   Author: Ernesto Arredondo Martinez (ernestone@gmail.com)
  4#   File: oracle_pandas.py
  5#   Created: 05/04/2020, 17:41
  6#   Last modified: 10/11/2019, 11:24
  7#   Copyright (c) 2020
  8
  9import geopandas as gpd
 10import pandas as pd
 11from pandas.api.types import CategoricalDtype
 12from functools import wraps
 13
 14try:
 15    from apb_cx_oracle_spatial.gestor_oracle import sql_tab
 16    _HAS_ORACLE_DEPS = True
 17except ImportError:
 18    _HAS_ORACLE_DEPS = False
 19
 20from . import optimize_df
 21
 22
 23def requires_oracle_deps(func):
 24    """Decorator to check if Oracle dependencies are installed"""
 25    @wraps(func)
 26    def wrapper(*args, **kwargs):
 27        if not _HAS_ORACLE_DEPS:
 28            raise ImportError(
 29                "Oracle functionality requires additional dependencies. "
 30                "Install with 'pip install apb_pandas_utils[oracle]'"
 31            )
 32        return func(*args, **kwargs)
 33    return wrapper
 34
 35
 36@requires_oracle_deps
 37def df_for_sqlgen(a_ora_generator, columns_index=None, columns=None,
 38                  optimize=False,
 39                  **params_df):
 40    """
 41    A partir de generator de gestor_oracle devuelve pandas.Dataframe
 42
 43    Args:
 44        a_ora_generator:
 45        columns_index:
 46        columns: lista con nombre de columnas a mostrar
 47        optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe
 48        params_df: parametros creación Dataframe
 49
 50    Returns:
 51        pandas.Dataframe
 52    """
 53    df = pd.DataFrame(a_ora_generator, **params_df)
 54    if not df.empty and columns_index:
 55        df.set_index([c.upper() for c in columns_index], inplace=True)
 56
 57    if columns:
 58        df = df[[c.upper() for c in columns if c in df.columns]]
 59
 60    if optimize:
 61        df = optimize_df(df)
 62
 63    return df
 64
 65
 66@requires_oracle_deps
 67def gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen):
 68    """
 69    A partir de generator de gestor_oracle devuelve geopandas.Dataframe
 70
 71    Args:
 72        a_ora_generator:
 73        column_geom:
 74        crs (str=None): name EPSG crs
 75        **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
 76
 77    Returns:
 78        geopandas.Dataframe
 79    """
 80    column_geom = column_geom.upper()
 81    cols = params_df_for_sqlgen.pop('columns', [])
 82    if cols:
 83        params_df_for_sqlgen['columns'] = cols_sql(cols, [column_geom])
 84
 85    gdf = gpd.GeoDataFrame(df_for_sqlgen(a_ora_generator,
 86                                         **params_df_for_sqlgen),
 87                           geometry=column_geom,
 88                           crs=crs)
 89
 90    return gdf
 91
 92
 93@requires_oracle_deps
 94def cols_sql(cols, *l_cols_add):
 95    """
 96    Retorna lista de columnas
 97
 98    Args:
 99        cols (list):
100        *l_cols_add: listas de columnas a añadir
101
102    Returns:
103        list
104    """
105    if cols:
106        cols = [c.upper() for c in cols]
107        for added_cols in l_cols_add:
108            cols += list(map(str.upper, added_cols))
109
110        return list(set(cols))
111
112
113@requires_oracle_deps
114def df_table(gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen):
115    """
116    Devuelve pandas.DataFrame para una tablas de oracle
117
118    Args:
119        gest_ora:
120        nom_tab:
121        filter_sql:
122        *args_filter_sql:
123        **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
124
125    Returns:
126        pandas.Dataframe
127    """
128    dd_tab = gest_ora.get_dd_table(nom_tab)
129    cols = params_df_for_sqlgen.get("columns", []).copy()
130    cols_idx = params_df_for_sqlgen.pop('columns_index', dd_tab.pk())
131
132    df_tab = df_for_sqlgen(
133        gest_ora.generator_rows_sql(sql_tab(nom_tab,
134                                            filter_sql,
135                                            cols_sql(cols,
136                                                     cols_idx)),
137                                    *args_filter_sql,
138                                    geom_format="as_geojson"),
139        columns_index=cols_idx,
140        **params_df_for_sqlgen)
141
142    if not cols:
143        df_tab = df_tab[[c for c in dd_tab.cols if c in df_tab.columns]]
144
145    return df_tab
146
147
148@requires_oracle_deps
149def gdf_table(gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False,
150              filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen):
151    """
152    Devuelve geopandas.GeoDataframe para una tablas de oracle
153
154    Args:
155        gest_ora:
156        nom_tab:
157        column_geom:
158        other_cols_geom:
159        null_geoms:
160        filter_sql:
161        *args_filter_sql:
162        **params_gdf_for_sqlgen:
163
164    Returns:
165        geopandas.GeoDataframe
166    """
167    dd = gest_ora.get_dd_table(nom_tab)
168    column_geom = column_geom.upper()
169    cols = params_gdf_for_sqlgen.pop('columns', [])
170    if not other_cols_geom:
171        if not cols:
172            cols = [c for c in dd.alfas(include_pk=False)]
173        else:
174            other_geoms = [g for g in dd.geoms().keys() if g != column_geom.upper()]
175            cols = [c for c in cols
176                    if c.upper() not in other_geoms]
177
178    cols_idx = params_gdf_for_sqlgen.pop('columns_index', dd.pk())
179
180    if not null_geoms:
181        not_null_geoms_sql = "{} is not null".format(column_geom)
182        if filter_sql:
183            filter_sql = "{} and ({})".format(not_null_geoms_sql, filter_sql)
184        else:
185            filter_sql = not_null_geoms_sql
186
187    crs_epsg = None
188    if (tip_geom := gest_ora.get_tip_camp_geom(nom_tab, column_geom)) and \
189            (epsg_code := gest_ora.get_epsg_for_srid(tip_geom.SRID)):
190        crs_epsg = f'EPSG:{epsg_code}'
191
192    gdf_tab = gdf_for_sqlgen(gest_ora.generator_rows_sql(sql_tab(nom_tab,
193                                                                 filter_sql,
194                                                                 cols_sql(cols,
195                                                                          cols_idx,
196                                                                          [column_geom])),
197                                                         *args_filter_sql,
198                                                         geom_format="as_shapely"),
199                             column_geom=column_geom,
200                             crs=crs_epsg,
201                             columns_index=cols_idx,
202                             columns=cols, **params_gdf_for_sqlgen)
203
204    if not cols:
205        gdf_tab = gdf_tab[[c for c in dd.cols if c in gdf_tab.columns]]
206
207    return gdf_tab
208
209
210@requires_oracle_deps
211def dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col):
212    """
213    Retorna pandas.dtype de tipo categorico para la columna de una tabla
214
215    Args:
216        gest_ora:
217        nom_tab:
218        nom_col:
219
220    Returns:
221        pandas.CategoricalDtype
222    """
223    return CategoricalDtype(categories=gest_ora.iter_distinct_vals_camp_tab(nom_tab, nom_col))
def requires_oracle_deps(func):
24def requires_oracle_deps(func):
25    """Decorator to check if Oracle dependencies are installed"""
26    @wraps(func)
27    def wrapper(*args, **kwargs):
28        if not _HAS_ORACLE_DEPS:
29            raise ImportError(
30                "Oracle functionality requires additional dependencies. "
31                "Install with 'pip install apb_pandas_utils[oracle]'"
32            )
33        return func(*args, **kwargs)
34    return wrapper

Decorator to check if Oracle dependencies are installed

@requires_oracle_deps
def df_for_sqlgen( a_ora_generator, columns_index=None, columns=None, optimize=False, **params_df):
37@requires_oracle_deps
38def df_for_sqlgen(a_ora_generator, columns_index=None, columns=None,
39                  optimize=False,
40                  **params_df):
41    """
42    A partir de generator de gestor_oracle devuelve pandas.Dataframe
43
44    Args:
45        a_ora_generator:
46        columns_index:
47        columns: lista con nombre de columnas a mostrar
48        optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe
49        params_df: parametros creación Dataframe
50
51    Returns:
52        pandas.Dataframe
53    """
54    df = pd.DataFrame(a_ora_generator, **params_df)
55    if not df.empty and columns_index:
56        df.set_index([c.upper() for c in columns_index], inplace=True)
57
58    if columns:
59        df = df[[c.upper() for c in columns if c in df.columns]]
60
61    if optimize:
62        df = optimize_df(df)
63
64    return df

A partir de generator de gestor_oracle devuelve pandas.Dataframe

Arguments:
  • a_ora_generator:
  • columns_index:
  • columns: lista con nombre de columnas a mostrar
  • optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe
  • params_df: parametros creación Dataframe
Returns:

pandas.Dataframe

@requires_oracle_deps
def gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen):
67@requires_oracle_deps
68def gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen):
69    """
70    A partir de generator de gestor_oracle devuelve geopandas.Dataframe
71
72    Args:
73        a_ora_generator:
74        column_geom:
75        crs (str=None): name EPSG crs
76        **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
77
78    Returns:
79        geopandas.Dataframe
80    """
81    column_geom = column_geom.upper()
82    cols = params_df_for_sqlgen.pop('columns', [])
83    if cols:
84        params_df_for_sqlgen['columns'] = cols_sql(cols, [column_geom])
85
86    gdf = gpd.GeoDataFrame(df_for_sqlgen(a_ora_generator,
87                                         **params_df_for_sqlgen),
88                           geometry=column_geom,
89                           crs=crs)
90
91    return gdf

A partir de generator de gestor_oracle devuelve geopandas.Dataframe

Arguments:
  • a_ora_generator:
  • column_geom:
  • crs (str=None): name EPSG crs
  • **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
Returns:

geopandas.Dataframe

@requires_oracle_deps
def cols_sql(cols, *l_cols_add):
 94@requires_oracle_deps
 95def cols_sql(cols, *l_cols_add):
 96    """
 97    Retorna lista de columnas
 98
 99    Args:
100        cols (list):
101        *l_cols_add: listas de columnas a añadir
102
103    Returns:
104        list
105    """
106    if cols:
107        cols = [c.upper() for c in cols]
108        for added_cols in l_cols_add:
109            cols += list(map(str.upper, added_cols))
110
111        return list(set(cols))

Retorna lista de columnas

Arguments:
  • cols (list):
  • *l_cols_add: listas de columnas a añadir
Returns:

list

@requires_oracle_deps
def df_table( gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen):
114@requires_oracle_deps
115def df_table(gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen):
116    """
117    Devuelve pandas.DataFrame para una tablas de oracle
118
119    Args:
120        gest_ora:
121        nom_tab:
122        filter_sql:
123        *args_filter_sql:
124        **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
125
126    Returns:
127        pandas.Dataframe
128    """
129    dd_tab = gest_ora.get_dd_table(nom_tab)
130    cols = params_df_for_sqlgen.get("columns", []).copy()
131    cols_idx = params_df_for_sqlgen.pop('columns_index', dd_tab.pk())
132
133    df_tab = df_for_sqlgen(
134        gest_ora.generator_rows_sql(sql_tab(nom_tab,
135                                            filter_sql,
136                                            cols_sql(cols,
137                                                     cols_idx)),
138                                    *args_filter_sql,
139                                    geom_format="as_geojson"),
140        columns_index=cols_idx,
141        **params_df_for_sqlgen)
142
143    if not cols:
144        df_tab = df_tab[[c for c in dd_tab.cols if c in df_tab.columns]]
145
146    return df_tab

Devuelve pandas.DataFrame para una tablas de oracle

Arguments:
  • gest_ora:
  • nom_tab:
  • filter_sql:
  • *args_filter_sql:
  • **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
Returns:

pandas.Dataframe

@requires_oracle_deps
def gdf_table( gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False, filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen):
149@requires_oracle_deps
150def gdf_table(gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False,
151              filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen):
152    """
153    Devuelve geopandas.GeoDataframe para una tablas de oracle
154
155    Args:
156        gest_ora:
157        nom_tab:
158        column_geom:
159        other_cols_geom:
160        null_geoms:
161        filter_sql:
162        *args_filter_sql:
163        **params_gdf_for_sqlgen:
164
165    Returns:
166        geopandas.GeoDataframe
167    """
168    dd = gest_ora.get_dd_table(nom_tab)
169    column_geom = column_geom.upper()
170    cols = params_gdf_for_sqlgen.pop('columns', [])
171    if not other_cols_geom:
172        if not cols:
173            cols = [c for c in dd.alfas(include_pk=False)]
174        else:
175            other_geoms = [g for g in dd.geoms().keys() if g != column_geom.upper()]
176            cols = [c for c in cols
177                    if c.upper() not in other_geoms]
178
179    cols_idx = params_gdf_for_sqlgen.pop('columns_index', dd.pk())
180
181    if not null_geoms:
182        not_null_geoms_sql = "{} is not null".format(column_geom)
183        if filter_sql:
184            filter_sql = "{} and ({})".format(not_null_geoms_sql, filter_sql)
185        else:
186            filter_sql = not_null_geoms_sql
187
188    crs_epsg = None
189    if (tip_geom := gest_ora.get_tip_camp_geom(nom_tab, column_geom)) and \
190            (epsg_code := gest_ora.get_epsg_for_srid(tip_geom.SRID)):
191        crs_epsg = f'EPSG:{epsg_code}'
192
193    gdf_tab = gdf_for_sqlgen(gest_ora.generator_rows_sql(sql_tab(nom_tab,
194                                                                 filter_sql,
195                                                                 cols_sql(cols,
196                                                                          cols_idx,
197                                                                          [column_geom])),
198                                                         *args_filter_sql,
199                                                         geom_format="as_shapely"),
200                             column_geom=column_geom,
201                             crs=crs_epsg,
202                             columns_index=cols_idx,
203                             columns=cols, **params_gdf_for_sqlgen)
204
205    if not cols:
206        gdf_tab = gdf_tab[[c for c in dd.cols if c in gdf_tab.columns]]
207
208    return gdf_tab

Devuelve geopandas.GeoDataframe para una tablas de oracle

Arguments:
  • gest_ora:
  • nom_tab:
  • column_geom:
  • other_cols_geom:
  • null_geoms:
  • filter_sql:
  • *args_filter_sql:
  • **params_gdf_for_sqlgen:
Returns:

geopandas.GeoDataframe

@requires_oracle_deps
def dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col):
211@requires_oracle_deps
212def dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col):
213    """
214    Retorna pandas.dtype de tipo categorico para la columna de una tabla
215
216    Args:
217        gest_ora:
218        nom_tab:
219        nom_col:
220
221    Returns:
222        pandas.CategoricalDtype
223    """
224    return CategoricalDtype(categories=gest_ora.iter_distinct_vals_camp_tab(nom_tab, nom_col))

Retorna pandas.dtype de tipo categorico para la columna de una tabla

Arguments:
  • gest_ora:
  • nom_tab:
  • nom_col:
Returns:

pandas.CategoricalDtype