apb_pandas_utils.oracle_pandas

  1#   coding=utf-8
  2#  #
  3#   Author: Ernesto Arredondo Martinez (ernestone@gmail.com)
  4#   File: oracle_pandas.py
  5#   Created: 05/04/2020, 17:41
  6#   Last modified: 10/11/2019, 11:24
  7#   Copyright (c) 2020
  8
  9import geopandas as gpd
 10import pandas as pd
 11from pandas.api.types import CategoricalDtype
 12
 13from apb_cx_oracle_spatial.gestor_oracle import sql_tab
 14from . import optimize_df
 15
 16
 17def df_for_sqlgen(a_ora_generator, columns_index=None, columns=None,
 18                  optimize=False,
 19                  **params_df):
 20    """
 21    A partir de generator de gestor_oracle devuelve pandas.Dataframe
 22
 23    Args:
 24        a_ora_generator:
 25        columns_index:
 26        columns: lista con nombre de columnas a mostrar
 27        optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe
 28        params_df: parametros creación Dataframe
 29
 30    Returns:
 31        pandas.Dataframe
 32    """
 33    df = pd.DataFrame(a_ora_generator, **params_df)
 34    if not df.empty and columns_index:
 35        df.set_index([c.upper() for c in columns_index], inplace=True)
 36
 37    if columns:
 38        df = df[[c.upper() for c in columns if c in df.columns]]
 39
 40    if optimize:
 41        df = optimize_df(df)
 42
 43    return df
 44
 45
 46def gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen):
 47    """
 48    A partir de generator de gestor_oracle devuelve geopandas.Dataframe
 49
 50    Args:
 51        a_ora_generator:
 52        column_geom:
 53        crs (str=None): name EPSG crs
 54        **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
 55
 56    Returns:
 57        geopandas.Dataframe
 58    """
 59    column_geom = column_geom.upper()
 60    cols = params_df_for_sqlgen.pop('columns', [])
 61    if cols:
 62        params_df_for_sqlgen['columns'] = cols_sql(cols, [column_geom])
 63
 64    gdf = gpd.GeoDataFrame(df_for_sqlgen(a_ora_generator,
 65                                         **params_df_for_sqlgen),
 66                           geometry=column_geom,
 67                           crs=crs)
 68
 69    return gdf
 70
 71
 72def cols_sql(cols, *l_cols_add):
 73    """
 74    Retorna lista de columnas
 75
 76    Args:
 77        cols (list):
 78        *l_cols_add: listas de columnas a añadir
 79
 80    Returns:
 81        list
 82    """
 83    if cols:
 84        cols = [c.upper() for c in cols]
 85        for added_cols in l_cols_add:
 86            cols += list(map(str.upper, added_cols))
 87
 88        return list(set(cols))
 89
 90
 91def df_table(gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen):
 92    """
 93    Devuelve pandas.DataFrame para una tablas de oracle
 94
 95    Args:
 96        gest_ora:
 97        nom_tab:
 98        filter_sql:
 99        *args_filter_sql:
100        **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
101
102    Returns:
103        pandas.Dataframe
104    """
105    dd_tab = gest_ora.get_dd_table(nom_tab)
106    cols = params_df_for_sqlgen.get("columns", []).copy()
107    cols_idx = params_df_for_sqlgen.pop('columns_index', dd_tab.pk())
108
109    df_tab = df_for_sqlgen(
110        gest_ora.generator_rows_sql(sql_tab(nom_tab,
111                                            filter_sql,
112                                            cols_sql(cols,
113                                                     cols_idx)),
114                                    *args_filter_sql,
115                                    geom_format="as_geojson"),
116        columns_index=cols_idx,
117        **params_df_for_sqlgen)
118
119    if not cols:
120        df_tab = df_tab[[c for c in dd_tab.cols if c in df_tab.columns]]
121
122    return df_tab
123
124
125def gdf_table(gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False,
126              filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen):
127    """
128    Devuelve geopandas.GeoDataframe para una tablas de oracle
129
130    Args:
131        gest_ora:
132        nom_tab:
133        column_geom:
134        other_cols_geom:
135        null_geoms:
136        filter_sql:
137        *args_filter_sql:
138        **params_gdf_for_sqlgen:
139
140    Returns:
141        geopandas.GeoDataframe
142    """
143    dd = gest_ora.get_dd_table(nom_tab)
144    column_geom = column_geom.upper()
145    cols = params_gdf_for_sqlgen.pop('columns', [])
146    if not other_cols_geom:
147        if not cols:
148            cols = [c for c in dd.alfas(include_pk=False)]
149        else:
150            other_geoms = [g for g in dd.geoms().keys() if g != column_geom.upper()]
151            cols = [c for c in cols
152                    if c.upper() not in other_geoms]
153
154    cols_idx = params_gdf_for_sqlgen.pop('columns_index', dd.pk())
155
156    if not null_geoms:
157        not_null_geoms_sql = "{} is not null".format(column_geom)
158        if filter_sql:
159            filter_sql = "{} and ({})".format(not_null_geoms_sql, filter_sql)
160        else:
161            filter_sql = not_null_geoms_sql
162
163    crs_epsg = None
164    if (tip_geom := gest_ora.get_tip_camp_geom(nom_tab, column_geom)) and \
165            (epsg_code := gest_ora.get_epsg_for_srid(tip_geom.SRID)):
166        crs_epsg = f'EPSG:{epsg_code}'
167
168    gdf_tab = gdf_for_sqlgen(gest_ora.generator_rows_sql(sql_tab(nom_tab,
169                                                                 filter_sql,
170                                                                 cols_sql(cols,
171                                                                          cols_idx,
172                                                                          [column_geom])),
173                                                         *args_filter_sql,
174                                                         geom_format="as_shapely"),
175                             column_geom=column_geom,
176                             crs=crs_epsg,
177                             columns_index=cols_idx,
178                             columns=cols, **params_gdf_for_sqlgen)
179
180    if not cols:
181        gdf_tab = gdf_tab[[c for c in dd.cols if c in gdf_tab.columns]]
182
183    return gdf_tab
184
185
186def dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col):
187    """
188    Retorna pandas.dtype de tipo categorico para la columna de una tabla
189
190    Args:
191        gest_ora:
192        nom_tab:
193        nom_col:
194
195    Returns:
196        pandas.CategoricalDtype
197    """
198    return CategoricalDtype(categories=gest_ora.iter_distinct_vals_camp_tab(nom_tab, nom_col))
def df_for_sqlgen( a_ora_generator, columns_index=None, columns=None, optimize=False, **params_df):
18def df_for_sqlgen(a_ora_generator, columns_index=None, columns=None,
19                  optimize=False,
20                  **params_df):
21    """
22    A partir de generator de gestor_oracle devuelve pandas.Dataframe
23
24    Args:
25        a_ora_generator:
26        columns_index:
27        columns: lista con nombre de columnas a mostrar
28        optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe
29        params_df: parametros creación Dataframe
30
31    Returns:
32        pandas.Dataframe
33    """
34    df = pd.DataFrame(a_ora_generator, **params_df)
35    if not df.empty and columns_index:
36        df.set_index([c.upper() for c in columns_index], inplace=True)
37
38    if columns:
39        df = df[[c.upper() for c in columns if c in df.columns]]
40
41    if optimize:
42        df = optimize_df(df)
43
44    return df

A partir de generator de gestor_oracle devuelve pandas.Dataframe

Arguments:
  • a_ora_generator:
  • columns_index:
  • columns: lista con nombre de columnas a mostrar
  • optimize: indica si se intentará optimizar el espacio ocupado por el Dataframe
  • params_df: parametros creación Dataframe
Returns:

pandas.Dataframe

def gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen):
47def gdf_for_sqlgen(a_ora_generator, column_geom, crs=None, **params_df_for_sqlgen):
48    """
49    A partir de generator de gestor_oracle devuelve geopandas.Dataframe
50
51    Args:
52        a_ora_generator:
53        column_geom:
54        crs (str=None): name EPSG crs
55        **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
56
57    Returns:
58        geopandas.Dataframe
59    """
60    column_geom = column_geom.upper()
61    cols = params_df_for_sqlgen.pop('columns', [])
62    if cols:
63        params_df_for_sqlgen['columns'] = cols_sql(cols, [column_geom])
64
65    gdf = gpd.GeoDataFrame(df_for_sqlgen(a_ora_generator,
66                                         **params_df_for_sqlgen),
67                           geometry=column_geom,
68                           crs=crs)
69
70    return gdf

A partir de generator de gestor_oracle devuelve geopandas.Dataframe

Arguments:
  • a_ora_generator:
  • column_geom:
  • crs (str=None): name EPSG crs
  • **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
Returns:

geopandas.Dataframe

def cols_sql(cols, *l_cols_add):
73def cols_sql(cols, *l_cols_add):
74    """
75    Retorna lista de columnas
76
77    Args:
78        cols (list):
79        *l_cols_add: listas de columnas a añadir
80
81    Returns:
82        list
83    """
84    if cols:
85        cols = [c.upper() for c in cols]
86        for added_cols in l_cols_add:
87            cols += list(map(str.upper, added_cols))
88
89        return list(set(cols))

Retorna lista de columnas

Arguments:
  • cols (list):
  • *l_cols_add: listas de columnas a añadir
Returns:

list

def df_table( gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen):
 92def df_table(gest_ora, nom_tab, filter_sql=None, *args_filter_sql, **params_df_for_sqlgen):
 93    """
 94    Devuelve pandas.DataFrame para una tablas de oracle
 95
 96    Args:
 97        gest_ora:
 98        nom_tab:
 99        filter_sql:
100        *args_filter_sql:
101        **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
102
103    Returns:
104        pandas.Dataframe
105    """
106    dd_tab = gest_ora.get_dd_table(nom_tab)
107    cols = params_df_for_sqlgen.get("columns", []).copy()
108    cols_idx = params_df_for_sqlgen.pop('columns_index', dd_tab.pk())
109
110    df_tab = df_for_sqlgen(
111        gest_ora.generator_rows_sql(sql_tab(nom_tab,
112                                            filter_sql,
113                                            cols_sql(cols,
114                                                     cols_idx)),
115                                    *args_filter_sql,
116                                    geom_format="as_geojson"),
117        columns_index=cols_idx,
118        **params_df_for_sqlgen)
119
120    if not cols:
121        df_tab = df_tab[[c for c in dd_tab.cols if c in df_tab.columns]]
122
123    return df_tab

Devuelve pandas.DataFrame para una tablas de oracle

Arguments:
  • gest_ora:
  • nom_tab:
  • filter_sql:
  • *args_filter_sql:
  • **params_df_for_sqlgen: parametros opcionales funcion df_for_sqlgen()
Returns:

pandas.Dataframe

def gdf_table( gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False, filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen):
126def gdf_table(gest_ora, nom_tab, column_geom, other_cols_geom=False, null_geoms=False,
127              filter_sql=None, *args_filter_sql, **params_gdf_for_sqlgen):
128    """
129    Devuelve geopandas.GeoDataframe para una tablas de oracle
130
131    Args:
132        gest_ora:
133        nom_tab:
134        column_geom:
135        other_cols_geom:
136        null_geoms:
137        filter_sql:
138        *args_filter_sql:
139        **params_gdf_for_sqlgen:
140
141    Returns:
142        geopandas.GeoDataframe
143    """
144    dd = gest_ora.get_dd_table(nom_tab)
145    column_geom = column_geom.upper()
146    cols = params_gdf_for_sqlgen.pop('columns', [])
147    if not other_cols_geom:
148        if not cols:
149            cols = [c for c in dd.alfas(include_pk=False)]
150        else:
151            other_geoms = [g for g in dd.geoms().keys() if g != column_geom.upper()]
152            cols = [c for c in cols
153                    if c.upper() not in other_geoms]
154
155    cols_idx = params_gdf_for_sqlgen.pop('columns_index', dd.pk())
156
157    if not null_geoms:
158        not_null_geoms_sql = "{} is not null".format(column_geom)
159        if filter_sql:
160            filter_sql = "{} and ({})".format(not_null_geoms_sql, filter_sql)
161        else:
162            filter_sql = not_null_geoms_sql
163
164    crs_epsg = None
165    if (tip_geom := gest_ora.get_tip_camp_geom(nom_tab, column_geom)) and \
166            (epsg_code := gest_ora.get_epsg_for_srid(tip_geom.SRID)):
167        crs_epsg = f'EPSG:{epsg_code}'
168
169    gdf_tab = gdf_for_sqlgen(gest_ora.generator_rows_sql(sql_tab(nom_tab,
170                                                                 filter_sql,
171                                                                 cols_sql(cols,
172                                                                          cols_idx,
173                                                                          [column_geom])),
174                                                         *args_filter_sql,
175                                                         geom_format="as_shapely"),
176                             column_geom=column_geom,
177                             crs=crs_epsg,
178                             columns_index=cols_idx,
179                             columns=cols, **params_gdf_for_sqlgen)
180
181    if not cols:
182        gdf_tab = gdf_tab[[c for c in dd.cols if c in gdf_tab.columns]]
183
184    return gdf_tab

Devuelve geopandas.GeoDataframe para una tablas de oracle

Arguments:
  • gest_ora:
  • nom_tab:
  • column_geom:
  • other_cols_geom:
  • null_geoms:
  • filter_sql:
  • *args_filter_sql:
  • **params_gdf_for_sqlgen:
Returns:

geopandas.GeoDataframe

def dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col):
187def dtype_categorical_from_tab_column(gest_ora, nom_tab, nom_col):
188    """
189    Retorna pandas.dtype de tipo categorico para la columna de una tabla
190
191    Args:
192        gest_ora:
193        nom_tab:
194        nom_col:
195
196    Returns:
197        pandas.CategoricalDtype
198    """
199    return CategoricalDtype(categories=gest_ora.iter_distinct_vals_camp_tab(nom_tab, nom_col))

Retorna pandas.dtype de tipo categorico para la columna de una tabla

Arguments:
  • gest_ora:
  • nom_tab:
  • nom_col:
Returns:

pandas.CategoricalDtype