0001"""
0002Implementation of JSONEncoder
0003"""
0004import re
0005try:
0006 from simplejson import _speedups
0007except ImportError:
0008 _speedups = None
0009
0010ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
0011ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
0012ESCAPE_DCT = {
0013
0014 '/': '\\/',
0015 '\\': '\\\\',
0016 '"': '\\"',
0017 '\b': '\\b',
0018 '\f': '\\f',
0019 '\n': '\\n',
0020 '\r': '\\r',
0021 '\t': '\\t',
0022}
0023for i in range(0x20):
0024 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
0025
0026
0027INFINITY = float('1e66666')
0028
0029def floatstr(o, allow_nan=True):
0030
0031
0032
0033 if o != o:
0034 text = 'NaN'
0035 elif o == INFINITY:
0036 text = 'Infinity'
0037 elif o == -INFINITY:
0038 text = '-Infinity'
0039 else:
0040 return str(o)
0041
0042 if not allow_nan:
0043 raise ValueError("Out of range float values are not JSON compliant: %r"
0044 % (o,))
0045
0046 return text
0047
0048
0049def encode_basestring(s):
0050 """
0051 Return a JSON representation of a Python string
0052 """
0053 def replace(match):
0054 return ESCAPE_DCT[match.group(0)]
0055 return '"' + ESCAPE.sub(replace, s) + '"'
0056
0057def encode_basestring_ascii(s):
0058 def replace(match):
0059 s = match.group(0)
0060 try:
0061 return ESCAPE_DCT[s]
0062 except KeyError:
0063 n = ord(s)
0064 if n < 0x10000:
0065 return '\\u%04x' % (n,)
0066 else:
0067
0068 n -= 0x10000
0069 s1 = 0xd800 | ((n >> 10) & 0x3ff)
0070 s2 = 0xdc00 | (n & 0x3ff)
0071 return '\\u%04x\\u%04x' % (s1, s2)
0072 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
0073
0074try:
0075 encode_basestring_ascii = _speedups.encode_basestring_ascii
0076 _need_utf8 = True
0077except AttributeError:
0078 _need_utf8 = False
0079
0080class JSONEncoder(object):
0081 """
0082 Extensible JSON <http://json.org> encoder for Python data structures.
0083
0084 Supports the following objects and types by default:
0085
0086 +-------------------+---------------+
0087 | Python | JSON |
0088 +===================+===============+
0089 | dict | object |
0090 +-------------------+---------------+
0091 | list, tuple | array |
0092 +-------------------+---------------+
0093 | str, unicode | string |
0094 +-------------------+---------------+
0095 | int, long, float | number |
0096 +-------------------+---------------+
0097 | True | true |
0098 +-------------------+---------------+
0099 | False | false |
0100 +-------------------+---------------+
0101 | None | null |
0102 +-------------------+---------------+
0103
0104 To extend this to recognize other objects, subclass and implement a
0105 ``.default()`` method with another method that returns a serializable
0106 object for ``o`` if possible, otherwise it should call the superclass
0107 implementation (to raise ``TypeError``).
0108 """
0109 __all__ = ['__init__', 'default', 'encode', 'iterencode']
0110 item_separator = ', '
0111 key_separator = ': '
0112 def __init__(self, skipkeys=False, ensure_ascii=True,
0113 check_circular=True, allow_nan=True, sort_keys=False,
0114 indent=None, separators=None, encoding='utf-8'):
0115 """
0116 Constructor for JSONEncoder, with sensible defaults.
0117
0118 If skipkeys is False, then it is a TypeError to attempt
0119 encoding of keys that are not str, int, long, float or None. If
0120 skipkeys is True, such items are simply skipped.
0121
0122 If ensure_ascii is True, the output is guaranteed to be str
0123 objects with all incoming unicode characters escaped. If
0124 ensure_ascii is false, the output will be unicode object.
0125
0126 If check_circular is True, then lists, dicts, and custom encoded
0127 objects will be checked for circular references during encoding to
0128 prevent an infinite recursion (which would cause an OverflowError).
0129 Otherwise, no such check takes place.
0130
0131 If allow_nan is True, then NaN, Infinity, and -Infinity will be
0132 encoded as such. This behavior is not JSON specification compliant,
0133 but is consistent with most JavaScript based encoders and decoders.
0134 Otherwise, it will be a ValueError to encode such floats.
0135
0136 If sort_keys is True, then the output of dictionaries will be
0137 sorted by key; this is useful for regression tests to ensure
0138 that JSON serializations can be compared on a day-to-day basis.
0139
0140 If indent is a non-negative integer, then JSON array
0141 elements and object members will be pretty-printed with that
0142 indent level. An indent level of 0 will only insert newlines.
0143 None is the most compact representation.
0144
0145 If specified, separators should be a (item_separator, key_separator)
0146 tuple. The default is (', ', ': '). To get the most compact JSON
0147 representation you should specify (',', ':') to eliminate whitespace.
0148
0149 If encoding is not None, then all input strings will be
0150 transformed into unicode using that encoding prior to JSON-encoding.
0151 The default is UTF-8.
0152 """
0153
0154 self.skipkeys = skipkeys
0155 self.ensure_ascii = ensure_ascii
0156 self.check_circular = check_circular
0157 self.allow_nan = allow_nan
0158 self.sort_keys = sort_keys
0159 self.indent = indent
0160 self.current_indent_level = 0
0161 if separators is not None:
0162 self.item_separator, self.key_separator = separators
0163 self.encoding = encoding
0164
0165 def _newline_indent(self):
0166 return '\n' + (' ' * (self.indent * self.current_indent_level))
0167
0168 def _iterencode_list(self, lst, markers=None):
0169 if not lst:
0170 yield '[]'
0171 return
0172 if markers is not None:
0173 markerid = id(lst)
0174 if markerid in markers:
0175 raise ValueError("Circular reference detected")
0176 markers[markerid] = lst
0177 yield '['
0178 if self.indent is not None:
0179 self.current_indent_level += 1
0180 newline_indent = self._newline_indent()
0181 separator = self.item_separator + newline_indent
0182 yield newline_indent
0183 else:
0184 newline_indent = None
0185 separator = self.item_separator
0186 first = True
0187 for value in lst:
0188 if first:
0189 first = False
0190 else:
0191 yield separator
0192 for chunk in self._iterencode(value, markers):
0193 yield chunk
0194 if newline_indent is not None:
0195 self.current_indent_level -= 1
0196 yield self._newline_indent()
0197 yield ']'
0198 if markers is not None:
0199 del markers[markerid]
0200
0201 def _iterencode_dict(self, dct, markers=None):
0202 if not dct:
0203 yield '{}'
0204 return
0205 if markers is not None:
0206 markerid = id(dct)
0207 if markerid in markers:
0208 raise ValueError("Circular reference detected")
0209 markers[markerid] = dct
0210 yield '{'
0211 key_separator = self.key_separator
0212 if self.indent is not None:
0213 self.current_indent_level += 1
0214 newline_indent = self._newline_indent()
0215 item_separator = self.item_separator + newline_indent
0216 yield newline_indent
0217 else:
0218 newline_indent = None
0219 item_separator = self.item_separator
0220 first = True
0221 if self.ensure_ascii:
0222 encoder = encode_basestring_ascii
0223 else:
0224 encoder = encode_basestring
0225 allow_nan = self.allow_nan
0226 if self.sort_keys:
0227 keys = dct.keys()
0228 keys.sort()
0229 items = [(k, dct[k]) for k in keys]
0230 else:
0231 items = dct.iteritems()
0232 _encoding = self.encoding
0233 _do_decode = (_encoding is not None
0234 and not (_need_utf8 and _encoding == 'utf-8'))
0235 for key, value in items:
0236 if isinstance(key, str):
0237 if _do_decode:
0238 key = key.decode(_encoding)
0239 elif isinstance(key, basestring):
0240 pass
0241
0242
0243 elif isinstance(key, float):
0244 key = floatstr(key, allow_nan)
0245 elif isinstance(key, (int, long)):
0246 key = str(key)
0247 elif key is True:
0248 key = 'true'
0249 elif key is False:
0250 key = 'false'
0251 elif key is None:
0252 key = 'null'
0253 elif self.skipkeys:
0254 continue
0255 else:
0256 raise TypeError("key %r is not a string" % (key,))
0257 if first:
0258 first = False
0259 else:
0260 yield item_separator
0261 yield encoder(key)
0262 yield key_separator
0263 for chunk in self._iterencode(value, markers):
0264 yield chunk
0265 if newline_indent is not None:
0266 self.current_indent_level -= 1
0267 yield self._newline_indent()
0268 yield '}'
0269 if markers is not None:
0270 del markers[markerid]
0271
0272 def _iterencode(self, o, markers=None):
0273 if isinstance(o, basestring):
0274 if self.ensure_ascii:
0275 encoder = encode_basestring_ascii
0276 else:
0277 encoder = encode_basestring
0278 _encoding = self.encoding
0279 if (_encoding is not None and isinstance(o, str)
0280 and not (_need_utf8 and _encoding == 'utf-8')):
0281 o = o.decode(_encoding)
0282 yield encoder(o)
0283 elif o is None:
0284 yield 'null'
0285 elif o is True:
0286 yield 'true'
0287 elif o is False:
0288 yield 'false'
0289 elif isinstance(o, (int, long)):
0290 yield str(o)
0291 elif isinstance(o, float):
0292 yield floatstr(o, self.allow_nan)
0293 elif isinstance(o, (list, tuple)):
0294 for chunk in self._iterencode_list(o, markers):
0295 yield chunk
0296 elif isinstance(o, dict):
0297 for chunk in self._iterencode_dict(o, markers):
0298 yield chunk
0299 else:
0300 if markers is not None:
0301 markerid = id(o)
0302 if markerid in markers:
0303 raise ValueError("Circular reference detected")
0304 markers[markerid] = o
0305 for chunk in self._iterencode_default(o, markers):
0306 yield chunk
0307 if markers is not None:
0308 del markers[markerid]
0309
0310 def _iterencode_default(self, o, markers=None):
0311 newobj = self.default(o)
0312 return self._iterencode(newobj, markers)
0313
0314 def default(self, o):
0315 """
0316 Implement this method in a subclass such that it returns
0317 a serializable object for ``o``, or calls the base implementation
0318 (to raise a ``TypeError``).
0319
0320 For example, to support arbitrary iterators, you could
0321 implement default like this::
0322
0323 def default(self, o):
0324 try:
0325 iterable = iter(o)
0326 except TypeError:
0327 pass
0328 else:
0329 return list(iterable)
0330 return JSONEncoder.default(self, o)
0331 """
0332 raise TypeError("%r is not JSON serializable" % (o,))
0333
0334 def encode(self, o):
0335 """
0336 Return a JSON string representation of a Python data structure.
0337
0338 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
0339 '{"foo":["bar", "baz"]}'
0340 """
0341
0342 if isinstance(o, basestring):
0343 if isinstance(o, str):
0344 _encoding = self.encoding
0345 if (_encoding is not None
0346 and not (_encoding == 'utf-8' and _need_utf8)):
0347 o = o.decode(_encoding)
0348 return encode_basestring_ascii(o)
0349
0350
0351
0352 chunks = list(self.iterencode(o))
0353 return ''.join(chunks)
0354
0355 def iterencode(self, o):
0356 """
0357 Encode the given object and yield each string
0358 representation as available.
0359
0360 For example::
0361
0362 for chunk in JSONEncoder().iterencode(bigobject):
0363 mysocket.write(chunk)
0364 """
0365 if self.check_circular:
0366 markers = {}
0367 else:
0368 markers = None
0369 return self._iterencode(o, markers)
0370
0371__all__ = ['JSONEncoder']