/Users/bob/src/simplejson/simplejson/encoder.py

0001"""

0002Implementation of JSONEncoder

0003"""

0004import re

0005try:

0006    from simplejson import _speedups

0007except ImportError:

0008    _speedups = None

0009

0010ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')

0011ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')

0012ESCAPE_DCT = {

0013    # escape all forward slashes to prevent </script> attack

0014    '/': '\\/',

0015    '\\': '\\\\',

0016    '"': '\\"',

0017    '\b': '\\b',

0018    '\f': '\\f',

0019    '\n': '\\n',

0020    '\r': '\\r',

0021    '\t': '\\t',

0022}

0023for i in range(0x20):

0024    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))

0025

0026# assume this produces an infinity on all machines (probably not guaranteed)

0027INFINITY = float('1e66666')

0028

0029def floatstr(o, allow_nan=True):

0030    # Check for specials.  Note that this type of test is processor- and/or

0031    # platform-specific, so do tests which don't depend on the internals.

0032

0033    if o != o:

0034        text = 'NaN'

0035    elif o == INFINITY:

0036        text = 'Infinity'

0037    elif o == -INFINITY:

0038        text = '-Infinity'

0039    else:

0040        return str(o)

0041

0042    if not allow_nan:

0043        raise ValueError("Out of range float values are not JSON compliant: %r"

0044            % (o,))

0045

0046    return text

0047

0048

0049def encode_basestring(s):

0050    """

0051    Return a JSON representation of a Python string

0052    """

0053    def replace(match):

0054        return ESCAPE_DCT[match.group(0)]

0055    return '"' + ESCAPE.sub(replace, s) + '"'

0056

0057def encode_basestring_ascii(s):

0058    def replace(match):

0059        s = match.group(0)

0060        try:

0061            return ESCAPE_DCT[s]

0062        except KeyError:

0063            n = ord(s)

0064            if n < 0x10000:

0065                return '\\u%04x' % (n,)

0066            else:

0067                # surrogate pair

0068                n -= 0x10000

0069                s1 = 0xd800 | ((n >> 10) & 0x3ff)

0070                s2 = 0xdc00 | (n & 0x3ff)

0071                return '\\u%04x\\u%04x' % (s1, s2)

0072    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'

0073

0074try:

0075    encode_basestring_ascii = _speedups.encode_basestring_ascii

0076    _need_utf8 = True

0077except AttributeError:

0078    _need_utf8 = False

0079

0080class JSONEncoder(object):

0081    """

0082    Extensible JSON <http://json.org> encoder for Python data structures.

0083

0084    Supports the following objects and types by default:

0085    

0086    +-------------------+---------------+

0087    | Python            | JSON          |

0088    +===================+===============+

0089    | dict              | object        |

0090    +-------------------+---------------+

0091    | list, tuple       | array         |

0092    +-------------------+---------------+

0093    | str, unicode      | string        |

0094    +-------------------+---------------+

0095    | int, long, float  | number        |

0096    +-------------------+---------------+

0097    | True              | true          |

0098    +-------------------+---------------+

0099    | False             | false         |

0100    +-------------------+---------------+

0101    | None              | null          |

0102    +-------------------+---------------+

0103

0104    To extend this to recognize other objects, subclass and implement a

0105    ``.default()`` method with another method that returns a serializable

0106    object for ``o`` if possible, otherwise it should call the superclass

0107    implementation (to raise ``TypeError``).

0108    """

0109    __all__ = ['__init__', 'default', 'encode', 'iterencode']

0110    item_separator = ', '

0111    key_separator = ': '

0112    def __init__(self, skipkeys=False, ensure_ascii=True,

0113            check_circular=True, allow_nan=True, sort_keys=False,

0114            indent=None, separators=None, encoding='utf-8'):

0115        """

0116        Constructor for JSONEncoder, with sensible defaults.

0117

0118        If skipkeys is False, then it is a TypeError to attempt

0119        encoding of keys that are not str, int, long, float or None.  If

0120        skipkeys is True, such items are simply skipped.

0121

0122        If ensure_ascii is True, the output is guaranteed to be str

0123        objects with all incoming unicode characters escaped.  If

0124        ensure_ascii is false, the output will be unicode object.

0125

0126        If check_circular is True, then lists, dicts, and custom encoded

0127        objects will be checked for circular references during encoding to

0128        prevent an infinite recursion (which would cause an OverflowError).

0129        Otherwise, no such check takes place.

0130

0131        If allow_nan is True, then NaN, Infinity, and -Infinity will be

0132        encoded as such.  This behavior is not JSON specification compliant,

0133        but is consistent with most JavaScript based encoders and decoders.

0134        Otherwise, it will be a ValueError to encode such floats.

0135

0136        If sort_keys is True, then the output of dictionaries will be

0137        sorted by key; this is useful for regression tests to ensure

0138        that JSON serializations can be compared on a day-to-day basis.

0139

0140        If indent is a non-negative integer, then JSON array

0141        elements and object members will be pretty-printed with that

0142        indent level.  An indent level of 0 will only insert newlines.

0143        None is the most compact representation.

0144

0145        If specified, separators should be a (item_separator, key_separator)

0146        tuple. The default is (', ', ': '). To get the most compact JSON

0147        representation you should specify (',', ':') to eliminate whitespace.

0148

0149        If encoding is not None, then all input strings will be

0150        transformed into unicode using that encoding prior to JSON-encoding. 

0151        The default is UTF-8.

0152        """

0153

0154        self.skipkeys = skipkeys

0155        self.ensure_ascii = ensure_ascii

0156        self.check_circular = check_circular

0157        self.allow_nan = allow_nan

0158        self.sort_keys = sort_keys

0159        self.indent = indent

0160        self.current_indent_level = 0

0161        if separators is not None:

0162            self.item_separator, self.key_separator = separators

0163        self.encoding = encoding

0164

0165    def _newline_indent(self):

0166        return '\n' + (' ' * (self.indent * self.current_indent_level))

0167

0168    def _iterencode_list(self, lst, markers=None):

0169        if not lst:

0170            yield '[]'

0171            return

0172        if markers is not None:

0173            markerid = id(lst)

0174            if markerid in markers:

0175                raise ValueError("Circular reference detected")

0176            markers[markerid] = lst

0177        yield '['

0178        if self.indent is not None:

0179            self.current_indent_level += 1

0180            newline_indent = self._newline_indent()

0181            separator = self.item_separator + newline_indent

0182            yield newline_indent

0183        else:

0184            newline_indent = None

0185            separator = self.item_separator

0186        first = True

0187        for value in lst:

0188            if first:

0189                first = False

0190            else:

0191                yield separator

0192            for chunk in self._iterencode(value, markers):

0193                yield chunk

0194        if newline_indent is not None:

0195            self.current_indent_level -= 1

0196            yield self._newline_indent()

0197        yield ']'

0198        if markers is not None:

0199            del markers[markerid]

0200

0201    def _iterencode_dict(self, dct, markers=None):

0202        if not dct:

0203            yield '{}'

0204            return

0205        if markers is not None:

0206            markerid = id(dct)

0207            if markerid in markers:

0208                raise ValueError("Circular reference detected")

0209            markers[markerid] = dct

0210        yield '{'

0211        key_separator = self.key_separator

0212        if self.indent is not None:

0213            self.current_indent_level += 1

0214            newline_indent = self._newline_indent()

0215            item_separator = self.item_separator + newline_indent

0216            yield newline_indent

0217        else:

0218            newline_indent = None

0219            item_separator = self.item_separator

0220        first = True

0221        if self.ensure_ascii:

0222            encoder = encode_basestring_ascii

0223        else:

0224            encoder = encode_basestring

0225        allow_nan = self.allow_nan

0226        if self.sort_keys:

0227            keys = dct.keys()

0228            keys.sort()

0229            items = [(k, dct[k]) for k in keys]

0230        else:

0231            items = dct.iteritems()

0232        _encoding = self.encoding

0233        _do_decode = (_encoding is not None

0234            and not (_need_utf8 and _encoding == 'utf-8'))

0235        for key, value in items:

0236            if isinstance(key, str):

0237                if _do_decode:

0238                    key = key.decode(_encoding)

0239            elif isinstance(key, basestring):

0240                pass

0241            # JavaScript is weakly typed for these, so it makes sense to

0242            # also allow them.  Many encoders seem to do something like this.

0243            elif isinstance(key, float):

0244                key = floatstr(key, allow_nan)

0245            elif isinstance(key, (int, long)):

0246                key = str(key)

0247            elif key is True:

0248                key = 'true'

0249            elif key is False:

0250                key = 'false'

0251            elif key is None:

0252                key = 'null'

0253            elif self.skipkeys:

0254                continue

0255            else:

0256                raise TypeError("key %r is not a string" % (key,))

0257            if first:

0258                first = False

0259            else:

0260                yield item_separator

0261            yield encoder(key)

0262            yield key_separator

0263            for chunk in self._iterencode(value, markers):

0264                yield chunk

0265        if newline_indent is not None:

0266            self.current_indent_level -= 1

0267            yield self._newline_indent()

0268        yield '}'

0269        if markers is not None:

0270            del markers[markerid]

0271

0272    def _iterencode(self, o, markers=None):

0273        if isinstance(o, basestring):

0274            if self.ensure_ascii:

0275                encoder = encode_basestring_ascii

0276            else:

0277                encoder = encode_basestring

0278            _encoding = self.encoding

0279            if (_encoding is not None and isinstance(o, str)

0280                    and not (_need_utf8 and _encoding == 'utf-8')):

0281                o = o.decode(_encoding)

0282            yield encoder(o)

0283        elif o is None:

0284            yield 'null'

0285        elif o is True:

0286            yield 'true'

0287        elif o is False:

0288            yield 'false'

0289        elif isinstance(o, (int, long)):

0290            yield str(o)

0291        elif isinstance(o, float):

0292            yield floatstr(o, self.allow_nan)

0293        elif isinstance(o, (list, tuple)):

0294            for chunk in self._iterencode_list(o, markers):

0295                yield chunk

0296        elif isinstance(o, dict):

0297            for chunk in self._iterencode_dict(o, markers):

0298                yield chunk

0299        else:

0300            if markers is not None:

0301                markerid = id(o)

0302                if markerid in markers:

0303                    raise ValueError("Circular reference detected")

0304                markers[markerid] = o

0305            for chunk in self._iterencode_default(o, markers):

0306                yield chunk

0307            if markers is not None:

0308                del markers[markerid]

0309

0310    def _iterencode_default(self, o, markers=None):

0311        newobj = self.default(o)

0312        return self._iterencode(newobj, markers)

0313

0314    def default(self, o):

0315        """

0316        Implement this method in a subclass such that it returns

0317        a serializable object for ``o``, or calls the base implementation

0318        (to raise a ``TypeError``).

0319

0320        For example, to support arbitrary iterators, you could

0321        implement default like this::

0322            

0323            def default(self, o):

0324                try:

0325                    iterable = iter(o)

0326                except TypeError:

0327                    pass

0328                else:

0329                    return list(iterable)

0330                return JSONEncoder.default(self, o)

0331        """

0332        raise TypeError("%r is not JSON serializable" % (o,))

0333

0334    def encode(self, o):

0335        """

0336        Return a JSON string representation of a Python data structure.

0337

0338        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})

0339        '{"foo":["bar", "baz"]}'

0340        """

0341        # This is for extremely simple cases and benchmarks...

0342        if isinstance(o, basestring):

0343            if isinstance(o, str):

0344                _encoding = self.encoding

0345                if (_encoding is not None

0346                        and not (_encoding == 'utf-8' and _need_utf8)):

0347                    o = o.decode(_encoding)

0348            return encode_basestring_ascii(o)

0349        # This doesn't pass the iterator directly to ''.join() because it

0350        # sucks at reporting exceptions.  It's going to do this internally

0351        # anyway because it uses PySequence_Fast or similar.

0352        chunks = list(self.iterencode(o))

0353        return ''.join(chunks)

0354

0355    def iterencode(self, o):

0356        """

0357        Encode the given object and yield each string

0358        representation as available.

0359        

0360        For example::

0361            

0362            for chunk in JSONEncoder().iterencode(bigobject):

0363                mysocket.write(chunk)

0364        """

0365        if self.check_circular:

0366            markers = {}

0367        else:

0368            markers = None

0369        return self._iterencode(o, markers)

0370

0371__all__ = ['JSONEncoder']