Source code for wolframclient.serializers.encoders.pandas

# -*- coding: utf-8 -*-

from __future__ import absolute_import, print_function, unicode_literals

from collections import OrderedDict
from itertools import starmap

from wolframclient.utils.api import pandas
from wolframclient.utils.dispatch import Dispatch

encoder = Dispatch()


[docs]def safe_pandas_length(o): """ Return the length of a pandas Series and DataFrame as expected for WL serialization. - The length of a Series is the only value of the tuple `shape`. - The length of a dataframe is the number of columns. It's the second value of `shape`. This function is safe, when the shape does not have the expected number of elements, it fails silently and returns `None`, the object is later traversed to find out how many elements it contains. """ try: return o.shape[-1] except (TypeError, IndexError): return
[docs]def encode_as_dataset(serializer, o, length): return serializer.serialize_function( serializer.serialize_symbol(b'Dataset'), (encode_as_association(serializer, o, length), ), )
[docs]def encode_as_list(serializer, o, length): return serializer.serialize_iterable( starmap(lambda k, v: serializer.serialize_rule(k, v), encoded_kv_tuples(serializer, o)), length=length)
[docs]def encode_as_association(serializer, o, length): return serializer.serialize_association( encoded_kv_tuples(serializer, o), length=length)
[docs]def encoded_kv_tuples(serializer, o): return ((serializer.encode(k), serializer.encode(v)) for k, v in o.items())
[docs]def encode_as_timeseries(serializer, o, length): return serializer.serialize_function( serializer.serialize_symbol(b'TimeSeries'), (serializer.serialize_iterable( (serializer.serialize_iterable(item, length=2) for item in encoded_kv_tuples(serializer, o)), length=length), ), )
def _distribute_multikey(o): expr_dict = OrderedDict() for multikey, value in o.iteritems(): cur_dict = expr_dict for key in multikey[:-1]: if key not in cur_dict: cur_dict[key] = OrderedDict() cur_dict = cur_dict[key] cur_dict[multikey[-1]] = value return expr_dict
[docs]def encode_multiindex_as_assoc(serializer, o, length): return serializer.encode(_distribute_multikey(o))
[docs]def encode_multiindex_as_dataset(serializer, o, length): return serializer.serialize_function( serializer.serialize_symbol(b'Dataset'), (serializer.encode(_distribute_multikey(o)), ))
PANDAS_PROPERTIES = { 'pandas_series_head': {'dataset', 'list', 'association'}, 'pandas_dataframe_head': {'dataset', 'association'}, 'timeseries': True, } ENCODERS = { 'default': { 'dataset': encode_as_dataset, 'list': encode_as_list, 'association': encode_as_association, }, 'datetimeindex': encode_as_timeseries, 'multiindex': { 'association': encode_multiindex_as_assoc, 'list': encode_multiindex_as_assoc, 'dataset': encode_multiindex_as_dataset }, }
[docs]def get_series_encoder_from_index(index, use_ts, form): if use_ts and isinstance(index, pandas.DatetimeIndex): return ENCODERS['datetimeindex'] elif isinstance(index, pandas.MultiIndex): return ENCODERS['multiindex'][form or 'dataset'] else: return ENCODERS['default'][form or 'association']
INVALID_PROPERTY_MSG = 'Invalid property %s, expecting %s'
[docs]def normalized_prop_timeseries(serializer): prop = serializer.get_property('timeseries', d=True) if not isinstance(prop, bool): raise ValueError( "Invalid value for property 'timeseries'. Expecting a boolean, got %s." % prop) return prop
[docs]def normalized_prop_pandas_series_head(serializer): """ Check property `pandas_series_head` only if specified (not None). """ prop = serializer.get_property('pandas_series_head', d=None) if prop and prop not in PANDAS_PROPERTIES['pandas_series_head']: raise ValueError( "Invalid value for property 'pandas_series_head'. Expecting one of (%s), got %s." % (', '.join(PANDAS_PROPERTIES['pandas_series_head']), prop)) return prop
[docs]@encoder.dispatch(pandas.Series) def encode_panda_series(serializer, o): use_ts = normalized_prop_timeseries(serializer) form = normalized_prop_pandas_series_head(serializer) encoder = get_series_encoder_from_index(o.index, use_ts, form) return encoder(serializer, o, safe_pandas_length(o))
[docs]def encode_dataframe_as_assoc(serializer, o, length): use_ts = normalized_prop_timeseries(serializer) return serializer.serialize_association( ((serializer.encode(k), get_series_encoder_from_index(v.index, use_ts, 'association')( serializer, v, safe_pandas_length(v))) for k, v in o.T.items()), length=length)
[docs]def encode_dataframe_as_dataset(serializer, o, length): return serializer.serialize_function( serializer.serialize_symbol(b'Dataset'), (encode_dataframe_as_assoc(serializer, o, length), ))
[docs]@encoder.dispatch(pandas.DataFrame) def encoder_panda_dataframe(serializer, o): head = serializer.get_property('pandas_dataframe_head', d=None) if head is None or head == 'dataset': return encode_dataframe_as_dataset(serializer, o, safe_pandas_length(o.index)) elif head in PANDAS_PROPERTIES['pandas_dataframe_head']: return encode_dataframe_as_assoc(serializer, o, safe_pandas_length(o.index)) else: raise ValueError( "Invalid value for property 'pandas_dataframe_head'. Expecting one of (%s), got %s." % (', '.join(PANDAS_PROPERTIES['pandas_dataframe_head']), prop))