# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, unicode_literals
import decimal
import re
from wolframclient.exception import WolframParserException
from wolframclient.language.expression import WLFunction, WLSymbol
from wolframclient.serializers.wxfencoder import constants
from wolframclient.serializers.wxfencoder.utils import array_to_list
from wolframclient.utils.api import numpy
__all__ = ["WXFConsumer", "WXFConsumerNumpy"]
[docs]class WXFConsumer(object):
"""Map WXF types to Python object generating functions.
This class exposes a comprehensive list of methods consuming WXF types.
Subclasses can override these members to implement custom parsing logic.
Example implementing a consumer that maps any function with head
:wl:`DirectedInfinity` to float('inf')::
class ExampleConsumer(WXFConsumer):
Infinity = wl.DirectedInfinity
def build_function(self, head, arg_list, **kwargs):
if head == self.Infinity:
return float('inf')
else:
super().build_function(head, args_list, **kwargs)
Test the new consumer::
>>> wxf = export({'-inf': wl.DirectedInfinity(-1), '+inf': wl.DirectedInfinity(1)}, target_format='wxf')
>>> binary_deserialize(wxf, consumer=ExampleConsumer())
{'-inf': inf, '+inf': inf}
Compare with default result::
>>> binary_deserialize(wxf)
{'-inf': DirectedInfinity[-1], '+inf': DirectedInfinity[1]}
Once initialized, the entry point of a consumer is the method
:func:`~wolframclient.deserializers.wxf.wxfconsumer.WXFConsumer.next_expression`.
It takes a token generator and returns a Python object. This method is particularly
useful when building nested expressions, e.g:
:func:`~wolframclient.deserializers.wxf.wxfconsumer.WXFConsumer.build_function`,
:func:`~wolframclient.deserializers.wxf.wxfconsumer.WXFConsumer.consume_association`, etc,
in order to fetch sub-expressions.
"""
_mapping = {
constants.WXF_CONSTANTS.Function: "consume_function",
constants.WXF_CONSTANTS.Symbol: "consume_symbol",
constants.WXF_CONSTANTS.String: "consume_string",
constants.WXF_CONSTANTS.BinaryString: "consume_binary_string",
constants.WXF_CONSTANTS.Integer8: "consume_integer8",
constants.WXF_CONSTANTS.Integer16: "consume_integer16",
constants.WXF_CONSTANTS.Integer32: "consume_integer32",
constants.WXF_CONSTANTS.Integer64: "consume_integer64",
constants.WXF_CONSTANTS.Real64: "consume_real64",
constants.WXF_CONSTANTS.BigInteger: "consume_bigint",
constants.WXF_CONSTANTS.BigReal: "consume_bigreal",
constants.WXF_CONSTANTS.PackedArray: "consume_packed_array",
constants.WXF_CONSTANTS.NumericArray: "consume_numeric_array",
constants.WXF_CONSTANTS.Association: "consume_association",
constants.WXF_CONSTANTS.Rule: "consume_rule",
constants.WXF_CONSTANTS.RuleDelayed: "consume_rule_delayed",
}
[docs] def next_expression(self, tokens, **kwargs):
"""Deserialize the next expression starting at the next token yield by `tokens`."""
token = next(tokens)
consumer = self._consumer_from_type(token.wxf_type)
return consumer(token, tokens, **kwargs)
def _consumer_from_type(self, wxf_type):
try:
func = self._mapping[wxf_type]
except KeyError:
raise WolframParserException(
"Class %s does not implement any consumer method for WXF token %s"
% (self.__class__.__name__, wxf_type)
)
return getattr(self, func)
_LIST = WLSymbol("List")
[docs] def consume_function(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *function*.
Return a :class:`list` if the head is symbol `List`, otherwise returns the result of :func:`~wolframclient.deserializers.wxf.wxfconsumer.WXFConsumer.build_function`
applied to the head and arguments.
Usually custom parsing rules target Functions, but not List. To do so, it is recommended to override
:func:`~wolframclient.deserializers.wxf.wxfconsumer.WXFConsumer.build_function`.
"""
head = self.next_expression(tokens, **kwargs)
args = tuple(
self.next_expression(tokens, **kwargs) for i in range(current_token.length)
)
if head == self._LIST:
return args
else:
return self.build_function(head, args, **kwargs)
[docs] def build_function(self, head, arg_list, **kwargs):
"""Create a Python object from head and args.
This function can be conveniently overloaded to create specific Python objects
from various heads. e.g: DateObject, Complex, etc.
"""
return WLFunction(head, *arg_list)
[docs] def consume_association(self, current_token, tokens, dict_class=dict, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *association*.
By default, return a :class:`dict` made from the rules.
The named option `dict_class` can be set to any type in which case an instance of
:class:`dict_class` is returned.
"""
return dict_class(
self.next_expression(tokens, **kwargs) for i in range(current_token.length)
)
[docs] def consume_rule(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *rule* as a tuple"""
return (self.next_expression(tokens, **kwargs), self.next_expression(tokens, **kwargs))
[docs] def consume_rule_delayed(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *rule* as a tuple"""
return (self.next_expression(tokens, **kwargs), self.next_expression(tokens, **kwargs))
BUILTIN_SYMBOL = {
"True": True,
"False": False,
"Null": None,
"Indeterminate": float("NaN"),
}
""" See documentation of :func:`~wolframclient.serializers.encoders.builtin.encode_none` for more information
about the mapping of None and Null. """
[docs] def consume_symbol(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *symbol* as a :class:`~wolframclient.language.expression.WLSymbol`"""
try:
return self.BUILTIN_SYMBOL[current_token.data]
except KeyError:
return WLSymbol(current_token.data)
[docs] def consume_bigint(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *big integer* as a :class:`int`."""
try:
return int(current_token.data)
except ValueError:
raise WolframParserException("Invalid big integer value: %s" % current_token.data)
BIGREAL_RE = re.compile(r"([^`]+)(`[0-9.]+){0,1}(\*\^[0-9]+){0,1}")
[docs] def consume_bigreal(self, current_token, tokens, **kwargs):
"""Parse a WXF big real as a WXF serializable big real.
There is not such thing as a big real, in Wolfram Language notation, in Python. This
wrapper ensures round tripping of big reals without the need of `ToExpression`.
Introducing `ToExpression` would imply to marshall the big real data to avoid malicious
code from being introduced in place of an actual real.
"""
match = self.BIGREAL_RE.match(current_token.data)
if match:
num, prec, exp = match.groups()
if exp:
return decimal.Decimal("%se%s" % (num, exp[2:]))
return decimal.Decimal(num)
raise WolframParserException("Invalid big real value: %s" % current_token.data)
[docs] def consume_string(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *string* as a string of unicode utf8 encoded."""
return current_token.data
[docs] def consume_binary_string(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *binary string* as a string of bytes."""
return current_token.data
[docs] def consume_integer8(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *integer* as a :class:`int`."""
return current_token.data
[docs] def consume_integer16(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *integer* as a :class:`int`."""
return current_token.data
[docs] def consume_integer32(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *integer* as a :class:`int`."""
return current_token.data
[docs] def consume_integer64(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *integer* as a :class:`int`."""
return current_token.data
[docs] def consume_real64(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *real* as a :class:`float`."""
return current_token.data
[docs] def consume_numeric_array(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *raw array*.
This method return :class:`list`, and made the assumption that system is little endian.
"""
return array_to_list(
current_token.data,
current_token.dimensions,
constants.ARRAY_TYPES_FROM_WXF_TYPES[current_token.array_type],
)
[docs] def consume_packed_array(self, current_token, tokens, **kwargs):
"""Consume a :class:`~wolframclient.deserializers.wxf.wxfparser.WXFToken` of type *packed array*.
This method return :class:`list`, and made the assumption that system is little endian.
"""
return array_to_list(
current_token.data,
current_token.dimensions,
constants.ARRAY_TYPES_FROM_WXF_TYPES[current_token.array_type],
)
[docs]class WXFConsumerNumpy(WXFConsumer):
""" A WXF consumer that maps WXF array types to NumPy arrays. """
[docs] def consume_numeric_array(self, current_token, tokens, **kwargs):
arr = numpy.frombuffer(
current_token.data,
dtype=WXFConsumerNumpy.WXF_TYPE_TO_DTYPE[current_token.array_type],
)
arr = numpy.reshape(arr, tuple(current_token.dimensions))
return arr
[docs] def consume_packed_array(self, current_token, tokens, **kwargs):
arr = self.consume_numeric_array(current_token, tokens, **kwargs)
return arr.view(numpy.PackedArray)
# """Build a numpy array from a PackedArray."""
# consume_packed_array = consume_packed_array
# """Build a numpy array from a NumericArray."""
# consume_numeric_array = consume_array
WXF_TYPE_TO_DTYPE = {
constants.ARRAY_TYPES.Integer8: "int8",
constants.ARRAY_TYPES.Integer16: "int16",
constants.ARRAY_TYPES.Integer32: "int32",
constants.ARRAY_TYPES.Integer64: "int64",
constants.ARRAY_TYPES.UnsignedInteger8: "uint8",
constants.ARRAY_TYPES.UnsignedInteger16: "uint16",
constants.ARRAY_TYPES.UnsignedInteger32: "uint32",
constants.ARRAY_TYPES.UnsignedInteger64: "uint64",
constants.ARRAY_TYPES.Real32: "float32",
constants.ARRAY_TYPES.Real64: "float64",
constants.ARRAY_TYPES.ComplexReal32: "complex64",
constants.ARRAY_TYPES.ComplexReal64: "complex128",
}