Initial commit

* Examples work
* setup.py kinda updasted
* Fork of txmongo but with new pymongo embedded
This commit is contained in:
Don Brown
2014-01-24 16:19:17 -07:00
commit ae154dbc72
60 changed files with 14825 additions and 0 deletions

View File

@@ -0,0 +1,616 @@
# Copyright 2009-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""BSON (Binary JSON) encoding and decoding.
"""
import calendar
import datetime
import re
import struct
import sys
from asyncio_mongo._bson.binary import (Binary, OLD_UUID_SUBTYPE,
JAVA_LEGACY, CSHARP_LEGACY)
from asyncio_mongo._bson.code import Code
from asyncio_mongo._bson.dbref import DBRef
from asyncio_mongo._bson.errors import (InvalidBSON,
InvalidDocument,
InvalidStringData)
from asyncio_mongo._bson.max_key import MaxKey
from asyncio_mongo._bson.min_key import MinKey
from asyncio_mongo._bson.objectid import ObjectId
from asyncio_mongo._bson.py3compat import b, binary_type
from asyncio_mongo._bson.son import SON, RE_TYPE
from asyncio_mongo._bson.timestamp import Timestamp
from asyncio_mongo._bson.tz_util import utc
try:
from asyncio_mongo._bson import _cbson
_use_c = True
except ImportError:
_use_c = False
try:
import uuid
_use_uuid = True
except ImportError:
_use_uuid = False
PY3 = sys.version_info[0] == 3
MAX_INT32 = 2147483647
MIN_INT32 = -2147483648
MAX_INT64 = 9223372036854775807
MIN_INT64 = -9223372036854775808
EPOCH_AWARE = datetime.datetime.fromtimestamp(0, utc)
EPOCH_NAIVE = datetime.datetime.utcfromtimestamp(0)
# Create constants compatible with all versions of
# python from 2.4 forward. In 2.x b("foo") is just
# "foo". In 3.x it becomes b"foo".
EMPTY = b("")
ZERO = b("\x00")
ONE = b("\x01")
BSONNUM = b("\x01") # Floating point
BSONSTR = b("\x02") # UTF-8 string
BSONOBJ = b("\x03") # Embedded document
BSONARR = b("\x04") # Array
BSONBIN = b("\x05") # Binary
BSONUND = b("\x06") # Undefined
BSONOID = b("\x07") # ObjectId
BSONBOO = b("\x08") # Boolean
BSONDAT = b("\x09") # UTC Datetime
BSONNUL = b("\x0A") # Null
BSONRGX = b("\x0B") # Regex
BSONREF = b("\x0C") # DBRef
BSONCOD = b("\x0D") # Javascript code
BSONSYM = b("\x0E") # Symbol
BSONCWS = b("\x0F") # Javascript code with scope
BSONINT = b("\x10") # 32bit int
BSONTIM = b("\x11") # Timestamp
BSONLON = b("\x12") # 64bit int
BSONMIN = b("\xFF") # Min key
BSONMAX = b("\x7F") # Max key
def _get_int(data, position, as_class=None,
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, unsigned=False):
format = unsigned and "I" or "i"
try:
value = struct.unpack("<%s" % format, data[position:position + 4])[0]
except struct.error:
raise InvalidBSON()
position += 4
return value, position
def _get_c_string(data, position, length=None):
if length is None:
try:
end = data.index(ZERO, position)
except ValueError:
raise InvalidBSON()
else:
end = position + length
value = data[position:end].decode("utf-8")
position = end + 1
return value, position
def _make_c_string(string, check_null=False):
if isinstance(string, str):
if check_null and "\x00" in string:
raise InvalidDocument("BSON keys / regex patterns must not "
"contain a NULL character")
return string.encode("utf-8") + ZERO
else:
if check_null and ZERO in string:
raise InvalidDocument("BSON keys / regex patterns must not "
"contain a NULL character")
try:
string.decode("utf-8")
return string + ZERO
except UnicodeError:
raise InvalidStringData("strings in documents must be valid "
"UTF-8: %r" % string)
def _get_number(data, position, as_class, tz_aware, uuid_subtype):
num = struct.unpack("<d", data[position:position + 8])[0]
position += 8
return num, position
def _get_string(data, position, as_class, tz_aware, uuid_subtype):
length = struct.unpack("<i", data[position:position + 4])[0]
if (len(data) - position - 4) < length:
raise InvalidBSON("invalid string length")
position += 4
if data[position + length - 1:position + length] != ZERO:
raise InvalidBSON("invalid end of string")
return _get_c_string(data, position, length - 1)
def _get_object(data, position, as_class, tz_aware, uuid_subtype):
obj_size = struct.unpack("<i", data[position:position + 4])[0]
if data[position + obj_size - 1:position + obj_size] != ZERO:
raise InvalidBSON("bad eoo")
encoded = data[position + 4:position + obj_size - 1]
object = _elements_to_dict(encoded, as_class, tz_aware, uuid_subtype)
position += obj_size
if "$ref" in object:
return (DBRef(object.pop("$ref"), object.pop("$id", None),
object.pop("$db", None), object), position)
return object, position
def _get_array(data, position, as_class, tz_aware, uuid_subtype):
obj, position = _get_object(data, position,
as_class, tz_aware, uuid_subtype)
result = []
i = 0
while True:
try:
result.append(obj[str(i)])
i += 1
except KeyError:
break
return result, position
def _get_binary(data, position, as_class, tz_aware, uuid_subtype):
length, position = _get_int(data, position)
subtype = ord(data[position:position + 1])
position += 1
if subtype == 2:
length2, position = _get_int(data, position)
if length2 != length - 4:
raise InvalidBSON("invalid binary (st 2) - lengths don't match!")
length = length2
if subtype in (3, 4) and _use_uuid:
# Java Legacy
if uuid_subtype == JAVA_LEGACY:
java = data[position:position + length]
value = uuid.UUID(bytes=java[0:8][::-1] + java[8:16][::-1])
# C# legacy
elif uuid_subtype == CSHARP_LEGACY:
value = uuid.UUID(bytes_le=data[position:position + length])
# Python
else:
value = uuid.UUID(bytes=data[position:position + length])
position += length
return (value, position)
# Python3 special case. Decode subtype 0 to 'bytes'.
if PY3 and subtype == 0:
value = data[position:position + length]
else:
value = Binary(data[position:position + length], subtype)
position += length
return value, position
def _get_oid(data, position, as_class=None,
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
value = ObjectId(data[position:position + 12])
position += 12
return value, position
def _get_boolean(data, position, as_class, tz_aware, uuid_subtype):
value = data[position:position + 1] == ONE
position += 1
return value, position
def _get_date(data, position, as_class, tz_aware, uuid_subtype):
millis = struct.unpack("<q", data[position:position + 8])[0]
diff = millis % 1000
seconds = (millis - diff) / 1000
position += 8
if tz_aware:
dt = EPOCH_AWARE + datetime.timedelta(seconds=seconds)
else:
dt = EPOCH_NAIVE + datetime.timedelta(seconds=seconds)
return dt.replace(microsecond=diff * 1000), position
def _get_code(data, position, as_class, tz_aware, uuid_subtype):
code, position = _get_string(data, position,
as_class, tz_aware, uuid_subtype)
return Code(code), position
def _get_code_w_scope(data, position, as_class, tz_aware, uuid_subtype):
_, position = _get_int(data, position)
code, position = _get_string(data, position,
as_class, tz_aware, uuid_subtype)
scope, position = _get_object(data, position,
as_class, tz_aware, uuid_subtype)
return Code(code, scope), position
def _get_null(data, position, as_class, tz_aware, uuid_subtype):
return None, position
def _get_regex(data, position, as_class, tz_aware, uuid_subtype):
pattern, position = _get_c_string(data, position)
bson_flags, position = _get_c_string(data, position)
flags = 0
if "i" in bson_flags:
flags |= re.IGNORECASE
if "l" in bson_flags:
flags |= re.LOCALE
if "m" in bson_flags:
flags |= re.MULTILINE
if "s" in bson_flags:
flags |= re.DOTALL
if "u" in bson_flags:
flags |= re.UNICODE
if "x" in bson_flags:
flags |= re.VERBOSE
return re.compile(pattern, flags), position
def _get_ref(data, position, as_class, tz_aware, uuid_subtype):
position += 4
collection, position = _get_c_string(data, position)
oid, position = _get_oid(data, position)
return DBRef(collection, oid), position
def _get_timestamp(data, position, as_class, tz_aware, uuid_subtype):
inc, position = _get_int(data, position, unsigned=True)
timestamp, position = _get_int(data, position, unsigned=True)
return Timestamp(timestamp, inc), position
def _get_long(data, position, as_class, tz_aware, uuid_subtype):
# Have to cast to long; on 32-bit unpack may return an int.
# 2to3 will change long to int. That's fine since long doesn't
# exist in python3.
value = int(struct.unpack("<q", data[position:position + 8])[0])
position += 8
return value, position
_element_getter = {
BSONNUM: _get_number,
BSONSTR: _get_string,
BSONOBJ: _get_object,
BSONARR: _get_array,
BSONBIN: _get_binary,
BSONUND: _get_null, # undefined
BSONOID: _get_oid,
BSONBOO: _get_boolean,
BSONDAT: _get_date,
BSONNUL: _get_null,
BSONRGX: _get_regex,
BSONREF: _get_ref,
BSONCOD: _get_code, # code
BSONSYM: _get_string, # symbol
BSONCWS: _get_code_w_scope,
BSONINT: _get_int, # number_int
BSONTIM: _get_timestamp,
BSONLON: _get_long, # Same as _get_int after 2to3 runs.
BSONMIN: lambda v, w, x, y, z: (MinKey(), w),
BSONMAX: lambda v, w, x, y, z: (MaxKey(), w)}
def _element_to_dict(data, position, as_class, tz_aware, uuid_subtype):
element_type = data[position:position + 1]
position += 1
element_name, position = _get_c_string(data, position)
value, position = _element_getter[element_type](data, position, as_class,
tz_aware, uuid_subtype)
return element_name, value, position
def _elements_to_dict(data, as_class, tz_aware, uuid_subtype):
result = as_class()
position = 0
end = len(data) - 1
while position < end:
(key, value, position) = _element_to_dict(data, position, as_class,
tz_aware, uuid_subtype)
result[key] = value
return result
def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
obj_size = struct.unpack("<i", data[:4])[0]
length = len(data)
if length < obj_size:
raise InvalidBSON("objsize too large")
if obj_size != length or data[obj_size - 1:obj_size] != ZERO:
raise InvalidBSON("bad eoo")
elements = data[4:obj_size - 1]
return (_elements_to_dict(elements, as_class,
tz_aware, uuid_subtype), data[obj_size:])
if _use_c:
_bson_to_dict = _cbson._bson_to_dict
def _element_to_bson(key, value, check_keys, uuid_subtype):
if not isinstance(key, str):
raise InvalidDocument("documents must have only string keys, "
"key was %r" % key)
if check_keys:
if key.startswith("$"):
raise InvalidDocument("key %r must not start with '$'" % key)
if "." in key:
raise InvalidDocument("key %r must not contain '.'" % key)
name = _make_c_string(key, True)
if isinstance(value, float):
return BSONNUM + name + struct.pack("<d", value)
if _use_uuid:
if isinstance(value, uuid.UUID):
# Java Legacy
if uuid_subtype == JAVA_LEGACY:
# Python 3.0(.1) returns a bytearray instance for bytes (3.1
# and newer just return a bytes instance). Convert that to
# binary_type (here and below) for compatibility.
from_uuid = binary_type(value.bytes)
as_legacy_java = from_uuid[0:8][::-1] + from_uuid[8:16][::-1]
value = Binary(as_legacy_java, subtype=OLD_UUID_SUBTYPE)
# C# legacy
elif uuid_subtype == CSHARP_LEGACY:
# Microsoft GUID representation.
value = Binary(binary_type(value.bytes_le),
subtype=OLD_UUID_SUBTYPE)
# Python
else:
value = Binary(binary_type(value.bytes), subtype=uuid_subtype)
if isinstance(value, Binary):
subtype = value.subtype
if subtype == 2:
value = struct.pack("<i", len(value)) + value
return (BSONBIN + name +
struct.pack("<i", len(value)) + b(chr(subtype)) + value)
if isinstance(value, Code):
cstring = _make_c_string(value)
if not value.scope:
length = struct.pack("<i", len(cstring))
return BSONCOD + name + length + cstring
scope = _dict_to_bson(value.scope, False, uuid_subtype, False)
full_length = struct.pack("<i", 8 + len(cstring) + len(scope))
length = struct.pack("<i", len(cstring))
return BSONCWS + name + full_length + length + cstring + scope
if isinstance(value, binary_type):
if PY3:
# Python3 special case. Store 'bytes' as BSON binary subtype 0.
return (BSONBIN + name +
struct.pack("<i", len(value)) + ZERO + value)
cstring = _make_c_string(value)
length = struct.pack("<i", len(cstring))
return BSONSTR + name + length + cstring
if isinstance(value, str):
cstring = _make_c_string(value)
length = struct.pack("<i", len(cstring))
return BSONSTR + name + length + cstring
if isinstance(value, dict):
return BSONOBJ + name + _dict_to_bson(value, check_keys, uuid_subtype, False)
if isinstance(value, (list, tuple)):
as_dict = SON(list(zip([str(i) for i in range(len(value))], value)))
return BSONARR + name + _dict_to_bson(as_dict, check_keys, uuid_subtype, False)
if isinstance(value, ObjectId):
return BSONOID + name + value.binary
if value is True:
return BSONBOO + name + ONE
if value is False:
return BSONBOO + name + ZERO
if isinstance(value, int):
# TODO this is an ugly way to check for this...
if value > MAX_INT64 or value < MIN_INT64:
raise OverflowError("BSON can only handle up to 8-byte ints")
if value > MAX_INT32 or value < MIN_INT32:
return BSONLON + name + struct.pack("<q", value)
return BSONINT + name + struct.pack("<i", value)
# 2to3 will convert long to int here since there is no long in python3.
# That's OK. The previous if block will match instead.
if isinstance(value, int):
if value > MAX_INT64 or value < MIN_INT64:
raise OverflowError("BSON can only handle up to 8-byte ints")
return BSONLON + name + struct.pack("<q", value)
if isinstance(value, datetime.datetime):
if value.utcoffset() is not None:
value = value - value.utcoffset()
millis = int(calendar.timegm(value.timetuple()) * 1000 +
value.microsecond / 1000)
return BSONDAT + name + struct.pack("<q", millis)
if isinstance(value, Timestamp):
time = struct.pack("<I", value.time)
inc = struct.pack("<I", value.inc)
return BSONTIM + name + inc + time
if value is None:
return BSONNUL + name
if isinstance(value, RE_TYPE):
pattern = value.pattern
flags = ""
if value.flags & re.IGNORECASE:
flags += "i"
if value.flags & re.LOCALE:
flags += "l"
if value.flags & re.MULTILINE:
flags += "m"
if value.flags & re.DOTALL:
flags += "s"
if value.flags & re.UNICODE:
flags += "u"
if value.flags & re.VERBOSE:
flags += "x"
return BSONRGX + name + _make_c_string(pattern, True) + \
_make_c_string(flags)
if isinstance(value, DBRef):
return _element_to_bson(key, value.as_doc(), False, uuid_subtype)
if isinstance(value, MinKey):
return BSONMIN + name
if isinstance(value, MaxKey):
return BSONMAX + name
raise InvalidDocument("cannot convert value of type %s to bson" %
type(value))
def _dict_to_bson(dict, check_keys, uuid_subtype, top_level=True):
try:
elements = []
if top_level and "_id" in dict:
elements.append(_element_to_bson("_id", dict["_id"], False, uuid_subtype))
for (key, value) in dict.items():
if not top_level or key != "_id":
elements.append(_element_to_bson(key, value, check_keys, uuid_subtype))
except AttributeError:
raise TypeError("encoder expected a mapping type but got: %r" % dict)
encoded = EMPTY.join(elements)
length = len(encoded) + 5
return struct.pack("<i", length) + encoded + ZERO
if _use_c:
_dict_to_bson = _cbson._dict_to_bson
def decode_all(data, as_class=dict,
tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE):
"""Decode BSON data to multiple documents.
`data` must be a string of concatenated, valid, BSON-encoded
documents.
:Parameters:
- `data`: BSON data
- `as_class` (optional): the class to use for the resulting
documents
- `tz_aware` (optional): if ``True``, return timezone-aware
:class:`~datetime.datetime` instances
.. versionadded:: 1.9
"""
docs = []
position = 0
end = len(data) - 1
while position < end:
obj_size = struct.unpack("<i", data[position:position + 4])[0]
if len(data) - position < obj_size:
raise InvalidBSON("objsize too large")
if data[position + obj_size - 1:position + obj_size] != ZERO:
raise InvalidBSON("bad eoo")
elements = data[position + 4:position + obj_size - 1]
position += obj_size
docs.append(_elements_to_dict(elements, as_class,
tz_aware, uuid_subtype))
return docs
if _use_c:
decode_all = _cbson.decode_all
def is_valid(bson):
"""Check that the given string represents valid :class:`BSON` data.
Raises :class:`TypeError` if `bson` is not an instance of
:class:`str` (:class:`bytes` in python 3). Returns ``True``
if `bson` is valid :class:`BSON`, ``False`` otherwise.
:Parameters:
- `bson`: the data to be validated
"""
if not isinstance(bson, binary_type):
raise TypeError("BSON data must be an instance "
"of a subclass of %s" % (binary_type.__name__,))
try:
(_, remainder) = _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE)
return remainder == EMPTY
except:
return False
class BSON(binary_type):
"""BSON (Binary JSON) data.
"""
@classmethod
def encode(cls, document, check_keys=False, uuid_subtype=OLD_UUID_SUBTYPE):
"""Encode a document to a new :class:`BSON` instance.
A document can be any mapping type (like :class:`dict`).
Raises :class:`TypeError` if `document` is not a mapping type,
or contains keys that are not instances of
:class:`basestring` (:class:`str` in python 3). Raises
:class:`~bson.errors.InvalidDocument` if `document` cannot be
converted to :class:`BSON`.
:Parameters:
- `document`: mapping type representing a document
- `check_keys` (optional): check if keys start with '$' or
contain '.', raising :class:`~bson.errors.InvalidDocument` in
either case
.. versionadded:: 1.9
"""
return cls(_dict_to_bson(document, check_keys, uuid_subtype))
def decode(self, as_class=dict,
tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
"""Decode this BSON data.
The default type to use for the resultant document is
:class:`dict`. Any other class that supports
:meth:`__setitem__` can be used instead by passing it as the
`as_class` parameter.
If `tz_aware` is ``True`` (recommended), any
:class:`~datetime.datetime` instances returned will be
timezone-aware, with their timezone set to
:attr:`bson.tz_util.utc`. Otherwise (default), all
:class:`~datetime.datetime` instances will be naive (but
contain UTC).
:Parameters:
- `as_class` (optional): the class to use for the resulting
document
- `tz_aware` (optional): if ``True``, return timezone-aware
:class:`~datetime.datetime` instances
.. versionadded:: 1.9
"""
(document, _) = _bson_to_dict(self, as_class, tz_aware, uuid_subtype)
return document
def has_c():
"""Is the C extension installed?
.. versionadded:: 1.9
"""
return _use_c
def has_uuid():
"""Is the uuid module available?
.. versionadded:: 2.3
"""
return _use_uuid

BIN
asyncio_mongo/_bson/_cbson.so Executable file

Binary file not shown.

View File

@@ -0,0 +1,229 @@
# Copyright 2009-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from uuid import UUID
except ImportError:
# Python2.4 doesn't have a uuid module.
pass
from asyncio_mongo._bson.py3compat import PY3, binary_type
"""Tools for representing BSON binary data.
"""
BINARY_SUBTYPE = 0
"""BSON binary subtype for binary data.
This is the default subtype for binary data.
.. versionadded:: 1.5
"""
FUNCTION_SUBTYPE = 1
"""BSON binary subtype for functions.
.. versionadded:: 1.5
"""
OLD_BINARY_SUBTYPE = 2
"""Old BSON binary subtype for binary data.
This is the old default subtype, the current
default is :data:`BINARY_SUBTYPE`.
.. versionadded:: 1.7
"""
OLD_UUID_SUBTYPE = 3
"""Old BSON binary subtype for a UUID.
:class:`uuid.UUID` instances will automatically be encoded
by :mod:`bson` using this subtype.
.. versionadded:: 2.1
"""
UUID_SUBTYPE = 4
"""BSON binary subtype for a UUID.
This is the new BSON binary subtype for UUIDs. The
current default is :data:`OLD_UUID_SUBTYPE` but will
change to this in a future release.
.. versionchanged:: 2.1
Changed to subtype 4.
.. versionadded:: 1.5
"""
JAVA_LEGACY = 5
"""Used with :attr:`pymongo.collection.Collection.uuid_subtype`
to specify that UUIDs should be stored in the legacy byte order
used by the Java driver.
:class:`uuid.UUID` instances will automatically be encoded
by :mod:`bson` using :data:`OLD_UUID_SUBTYPE`.
.. versionadded:: 2.3
"""
CSHARP_LEGACY = 6
"""Used with :attr:`pymongo.collection.Collection.uuid_subtype`
to specify that UUIDs should be stored in the legacy byte order
used by the C# driver.
:class:`uuid.UUID` instances will automatically be encoded
by :mod:`bson` using :data:`OLD_UUID_SUBTYPE`.
.. versionadded:: 2.3
"""
ALL_UUID_SUBTYPES = (OLD_UUID_SUBTYPE, UUID_SUBTYPE, JAVA_LEGACY, CSHARP_LEGACY)
MD5_SUBTYPE = 5
"""BSON binary subtype for an MD5 hash.
.. versionadded:: 1.5
"""
USER_DEFINED_SUBTYPE = 128
"""BSON binary subtype for any user defined structure.
.. versionadded:: 1.5
"""
class Binary(binary_type):
"""Representation of BSON binary data.
This is necessary because we want to represent Python strings as
the BSON string type. We need to wrap binary data so we can tell
the difference between what should be considered binary data and
what should be considered a string when we encode to BSON.
Raises TypeError if `data` is not an instance of :class:`str`
(:class:`bytes` in python 3) or `subtype` is not an instance of
:class:`int`. Raises ValueError if `subtype` is not in [0, 256).
.. note::
In python 3 instances of Binary with subtype 0 will be decoded
directly to :class:`bytes`.
:Parameters:
- `data`: the binary data to represent
- `subtype` (optional): the `binary subtype
<http://bsonspec.org/#/specification>`_
to use
"""
def __new__(cls, data, subtype=BINARY_SUBTYPE):
if not isinstance(data, binary_type):
raise TypeError("data must be an "
"instance of %s" % (binary_type.__name__,))
if not isinstance(subtype, int):
raise TypeError("subtype must be an instance of int")
if subtype >= 256 or subtype < 0:
raise ValueError("subtype must be contained in [0, 256)")
self = binary_type.__new__(cls, data)
self.__subtype = subtype
return self
@property
def subtype(self):
"""Subtype of this binary data.
"""
return self.__subtype
def __getnewargs__(self):
# Work around http://bugs.python.org/issue7382
data = super(Binary, self).__getnewargs__()[0]
if PY3 and not isinstance(data, binary_type):
data = data.encode('latin-1')
return data, self.__subtype
def __eq__(self, other):
if isinstance(other, Binary):
return ((self.__subtype, binary_type(self)) ==
(other.subtype, binary_type(other)))
# We don't return NotImplemented here because if we did then
# Binary("foo") == "foo" would return True, since Binary is a
# subclass of str...
return False
def __ne__(self, other):
return not self == other
def __repr__(self):
return "Binary(%s, %s)" % (binary_type.__repr__(self), self.__subtype)
class UUIDLegacy(Binary):
"""UUID wrapper to support working with UUIDs stored as legacy
BSON binary subtype 3.
.. doctest::
>>> import uuid
>>> from asyncio_mongo._bson.binary import Binary, UUIDLegacy, UUID_SUBTYPE
>>> my_uuid = uuid.uuid4()
>>> coll = db.test
>>> coll.uuid_subtype = UUID_SUBTYPE
>>> coll.insert({'uuid': Binary(my_uuid.bytes, 3)})
ObjectId('...')
>>> coll.find({'uuid': my_uuid}).count()
0
>>> coll.find({'uuid': UUIDLegacy(my_uuid)}).count()
1
>>> coll.find({'uuid': UUIDLegacy(my_uuid)})[0]['uuid']
UUID('...')
>>>
>>> # Convert from subtype 3 to subtype 4
>>> doc = coll.find_one({'uuid': UUIDLegacy(my_uuid)})
>>> coll.save(doc)
ObjectId('...')
>>> coll.find({'uuid': UUIDLegacy(my_uuid)}).count()
0
>>> coll.find({'uuid': {'$in': [UUIDLegacy(my_uuid), my_uuid]}}).count()
1
>>> coll.find_one({'uuid': my_uuid})['uuid']
UUID('...')
Raises TypeError if `obj` is not an instance of :class:`~uuid.UUID`.
:Parameters:
- `obj`: An instance of :class:`~uuid.UUID`.
"""
def __new__(cls, obj):
if not isinstance(obj, UUID):
raise TypeError("obj must be an instance of uuid.UUID")
# Python 3.0(.1) returns a bytearray instance for bytes (3.1 and
# newer just return a bytes instance). Convert that to binary_type
# for compatibility.
self = Binary.__new__(cls, binary_type(obj.bytes), OLD_UUID_SUBTYPE)
self.__uuid = obj
return self
def __getnewargs__(self):
# Support copy and deepcopy
return (self.__uuid,)
@property
def uuid(self):
"""UUID instance wrapped by this UUIDLegacy instance.
"""
return self.__uuid
def __repr__(self):
return "UUIDLegacy('%s')" % self.__uuid

View File

@@ -0,0 +1,78 @@
# Copyright 2009-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for representing JavaScript code in BSON.
"""
class Code(str):
"""BSON's JavaScript code type.
Raises :class:`TypeError` if `code` is not an instance of
:class:`basestring` (:class:`str` in python 3) or `scope`
is not ``None`` or an instance of :class:`dict`.
Scope variables can be set by passing a dictionary as the `scope`
argument or by using keyword arguments. If a variable is set as a
keyword argument it will override any setting for that variable in
the `scope` dictionary.
:Parameters:
- `code`: string containing JavaScript code to be evaluated
- `scope` (optional): dictionary representing the scope in which
`code` should be evaluated - a mapping from identifiers (as
strings) to values
- `**kwargs` (optional): scope variables can also be passed as
keyword arguments
.. versionadded:: 1.9
Ability to pass scope values using keyword arguments.
"""
def __new__(cls, code, scope=None, **kwargs):
if not isinstance(code, str):
raise TypeError("code must be an "
"instance of %s" % (str.__name__,))
self = str.__new__(cls, code)
try:
self.__scope = code.scope
except AttributeError:
self.__scope = {}
if scope is not None:
if not isinstance(scope, dict):
raise TypeError("scope must be an instance of dict")
self.__scope.update(scope)
self.__scope.update(kwargs)
return self
@property
def scope(self):
"""Scope dictionary for this instance.
"""
return self.__scope
def __repr__(self):
return "Code(%s, %r)" % (str.__repr__(self), self.__scope)
def __eq__(self, other):
if isinstance(other, Code):
return (self.__scope, str(self)) == (other.__scope, str(other))
return False
def __ne__(self, other):
return not self == other

View File

@@ -0,0 +1,144 @@
# Copyright 2009-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for manipulating DBRefs (references to MongoDB documents)."""
from copy import deepcopy
from asyncio_mongo._bson.son import SON
class DBRef(object):
"""A reference to a document stored in MongoDB.
"""
def __init__(self, collection, id, database=None, _extra={}, **kwargs):
"""Initialize a new :class:`DBRef`.
Raises :class:`TypeError` if `collection` or `database` is not
an instance of :class:`basestring` (:class:`str` in python 3).
`database` is optional and allows references to documents to work
across databases. Any additional keyword arguments will create
additional fields in the resultant embedded document.
:Parameters:
- `collection`: name of the collection the document is stored in
- `id`: the value of the document's ``"_id"`` field
- `database` (optional): name of the database to reference
- `**kwargs` (optional): additional keyword arguments will
create additional, custom fields
.. versionchanged:: 1.8
Now takes keyword arguments to specify additional fields.
.. versionadded:: 1.1.1
The `database` parameter.
.. mongodoc:: dbrefs
"""
if not isinstance(collection, str):
raise TypeError("collection must be an "
"instance of %s" % (str.__name__,))
if database is not None and not isinstance(database, str):
raise TypeError("database must be an "
"instance of %s" % (str.__name__,))
self.__collection = collection
self.__id = id
self.__database = database
kwargs.update(_extra)
self.__kwargs = kwargs
@property
def collection(self):
"""Get the name of this DBRef's collection as unicode.
"""
return self.__collection
@property
def id(self):
"""Get this DBRef's _id.
"""
return self.__id
@property
def database(self):
"""Get the name of this DBRef's database.
Returns None if this DBRef doesn't specify a database.
.. versionadded:: 1.1.1
"""
return self.__database
def __getattr__(self, key):
try:
return self.__kwargs[key]
except KeyError:
raise AttributeError(key)
# Have to provide __setstate__ to avoid
# infinite recursion since we override
# __getattr__.
def __setstate__(self, state):
self.__dict__.update(state)
def as_doc(self):
"""Get the SON document representation of this DBRef.
Generally not needed by application developers
"""
doc = SON([("$ref", self.collection),
("$id", self.id)])
if self.database is not None:
doc["$db"] = self.database
doc.update(self.__kwargs)
return doc
def __repr__(self):
extra = "".join([", %s=%r" % (k, v)
for k, v in self.__kwargs.items()])
if self.database is None:
return "DBRef(%r, %r%s)" % (self.collection, self.id, extra)
return "DBRef(%r, %r, %r%s)" % (self.collection, self.id,
self.database, extra)
def __eq__(self, other):
if isinstance(other, DBRef):
us = (self.__database, self.__collection,
self.__id, self.__kwargs)
them = (other.__database, other.__collection,
other.__id, other.__kwargs)
return us == them
return NotImplemented
def __ne__(self, other):
return not self == other
def __hash__(self):
"""Get a hash value for this :class:`DBRef`.
.. versionadded:: 1.1
"""
return hash((self.__collection, self.__id, self.__database,
tuple(sorted(self.__kwargs.items()))))
def __deepcopy__(self, memo):
"""Support function for `copy.deepcopy()`.
.. versionadded:: 1.10
"""
return DBRef(deepcopy(self.__collection, memo),
deepcopy(self.__id, memo),
deepcopy(self.__database, memo),
deepcopy(self.__kwargs, memo))

View File

@@ -0,0 +1,40 @@
# Copyright 2009-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Exceptions raised by the BSON package."""
class BSONError(Exception):
"""Base class for all BSON exceptions.
"""
class InvalidBSON(BSONError):
"""Raised when trying to create a BSON object from invalid data.
"""
class InvalidStringData(BSONError):
"""Raised when trying to encode a string containing non-UTF8 data.
"""
class InvalidDocument(BSONError):
"""Raised when trying to create a BSON object from an invalid document.
"""
class InvalidId(BSONError):
"""Raised when trying to create an ObjectId from invalid data.
"""

View File

@@ -0,0 +1,220 @@
# Copyright 2009-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for using Python's :mod:`json` module with BSON documents.
This module provides two helper methods `dumps` and `loads` that wrap the
native :mod:`json` methods and provide explicit BSON conversion to and from
json. This allows for specialized encoding and decoding of BSON documents
into `Mongo Extended JSON
<http://www.mongodb.org/display/DOCS/Mongo+Extended+JSON>`_'s *Strict*
mode. This lets you encode / decode BSON documents to JSON even when
they use special BSON types.
Example usage (serialization)::
.. doctest::
>>> from asyncio_mongo._bson import Binary, Code
>>> from asyncio_mongo._bson.json_util import dumps
>>> dumps([{'foo': [1, 2]},
... {'bar': {'hello': 'world'}},
... {'code': Code("function x() { return 1; }")},
... {'bin': Binary("\x00\x01\x02\x03\x04")}])
'[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$scope": {}, "$code": "function x() { return 1; }"}}, {"bin": {"$type": "00", "$binary": "AAECAwQ="}}]'
Example usage (deserialization)::
.. doctest::
>>> from asyncio_mongo._bson.json_util import loads
>>> loads('[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$scope": {}, "$code": "function x() { return 1; }"}}, {"bin": {"$type": "00", "$binary": "AAECAwQ="}}]')
[{u'foo': [1, 2]}, {u'bar': {u'hello': u'world'}}, {u'code': Code('function x() { return 1; }', {})}, {u'bin': Binary('\x00\x01\x02\x03\x04', 0)}]
Alternatively, you can manually pass the `default` to :func:`json.dumps`.
It won't handle :class:`~bson.binary.Binary` and :class:`~bson.code.Code`
instances (as they are extended strings you can't provide custom defaults),
but it will be faster as there is less recursion.
.. versionchanged:: 2.3
Added dumps and loads helpers to automatically handle conversion to and
from json and supports :class:`~bson.binary.Binary` and
:class:`~bson.code.Code`
.. versionchanged:: 1.9
Handle :class:`uuid.UUID` instances, whenever possible.
.. versionchanged:: 1.8
Handle timezone aware datetime instances on encode, decode to
timezone aware datetime instances.
.. versionchanged:: 1.8
Added support for encoding/decoding :class:`~bson.max_key.MaxKey`
and :class:`~bson.min_key.MinKey`, and for encoding
:class:`~bson.timestamp.Timestamp`.
.. versionchanged:: 1.2
Added support for encoding/decoding datetimes and regular expressions.
"""
import base64
import calendar
import datetime
import re
json_lib = True
try:
import json
except ImportError:
try:
import simplejson as json
except ImportError:
json_lib = False
import asyncio_mongo._bson as bson
from asyncio_mongo._bson import EPOCH_AWARE, RE_TYPE
from asyncio_mongo._bson.binary import Binary
from asyncio_mongo._bson.code import Code
from asyncio_mongo._bson.dbref import DBRef
from asyncio_mongo._bson.max_key import MaxKey
from asyncio_mongo._bson.min_key import MinKey
from asyncio_mongo._bson.objectid import ObjectId
from asyncio_mongo._bson.timestamp import Timestamp
from asyncio_mongo._bson.py3compat import PY3, binary_type, string_types
_RE_OPT_TABLE = {
"i": re.I,
"l": re.L,
"m": re.M,
"s": re.S,
"u": re.U,
"x": re.X,
}
def dumps(obj, *args, **kwargs):
"""Helper function that wraps :class:`json.dumps`.
Recursive function that handles all BSON types including
:class:`~bson.binary.Binary` and :class:`~bson.code.Code`.
"""
if not json_lib:
raise Exception("No json library available")
return json.dumps(_json_convert(obj), *args, **kwargs)
def loads(s, *args, **kwargs):
"""Helper function that wraps :class:`json.loads`.
Automatically passes the object_hook for BSON type conversion.
"""
if not json_lib:
raise Exception("No json library available")
kwargs['object_hook'] = object_hook
return json.loads(s, *args, **kwargs)
def _json_convert(obj):
"""Recursive helper method that converts BSON types so they can be
converted into json.
"""
if hasattr(obj, 'iteritems') or hasattr(obj, 'items'): # PY3 support
return dict(((k, _json_convert(v)) for k, v in obj.items()))
elif hasattr(obj, '__iter__') and not isinstance(obj, string_types):
return list((_json_convert(v) for v in obj))
try:
return default(obj)
except TypeError:
return obj
def object_hook(dct):
if "$oid" in dct:
return ObjectId(str(dct["$oid"]))
if "$ref" in dct:
return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
if "$date" in dct:
secs = float(dct["$date"]) / 1000.0
return EPOCH_AWARE + datetime.timedelta(seconds=secs)
if "$regex" in dct:
flags = 0
# PyMongo always adds $options but some other tools may not.
for opt in dct.get("$options", ""):
flags |= _RE_OPT_TABLE.get(opt, 0)
return re.compile(dct["$regex"], flags)
if "$minKey" in dct:
return MinKey()
if "$maxKey" in dct:
return MaxKey()
if "$binary" in dct:
if isinstance(dct["$type"], int):
dct["$type"] = "%02x" % dct["$type"]
subtype = int(dct["$type"], 16)
if subtype >= 0xffffff80: # Handle mongoexport values
subtype = int(dct["$type"][6:], 16)
return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
if "$code" in dct:
return Code(dct["$code"], dct.get("$scope"))
if bson.has_uuid() and "$uuid" in dct:
return bson.uuid.UUID(dct["$uuid"])
return dct
def default(obj):
if isinstance(obj, ObjectId):
return {"$oid": str(obj)}
if isinstance(obj, DBRef):
return _json_convert(obj.as_doc())
if isinstance(obj, datetime.datetime):
# TODO share this code w/ bson.py?
if obj.utcoffset() is not None:
obj = obj - obj.utcoffset()
millis = int(calendar.timegm(obj.timetuple()) * 1000 +
obj.microsecond / 1000)
return {"$date": millis}
if isinstance(obj, RE_TYPE):
flags = ""
if obj.flags & re.IGNORECASE:
flags += "i"
if obj.flags & re.LOCALE:
flags += "l"
if obj.flags & re.MULTILINE:
flags += "m"
if obj.flags & re.DOTALL:
flags += "s"
if obj.flags & re.UNICODE:
flags += "u"
if obj.flags & re.VERBOSE:
flags += "x"
return {"$regex": obj.pattern,
"$options": flags}
if isinstance(obj, MinKey):
return {"$minKey": 1}
if isinstance(obj, MaxKey):
return {"$maxKey": 1}
if isinstance(obj, Timestamp):
return {"t": obj.time, "i": obj.inc}
if isinstance(obj, Code):
return {'$code': "%s" % obj, '$scope': obj.scope}
if isinstance(obj, Binary):
return {'$binary': base64.b64encode(obj).decode(),
'$type': "%02x" % obj.subtype}
if PY3 and isinstance(obj, binary_type):
return {'$binary': base64.b64encode(obj).decode(),
'$type': "00"}
if bson.has_uuid() and isinstance(obj, bson.uuid.UUID):
return {"$uuid": obj.hex}
raise TypeError("%r is not JSON serializable" % obj)

View File

@@ -0,0 +1,32 @@
# Copyright 2010-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Representation for the MongoDB internal MaxKey type.
"""
class MaxKey(object):
"""MongoDB internal MaxKey type.
"""
def __eq__(self, other):
if isinstance(other, MaxKey):
return True
return NotImplemented
def __ne__(self, other):
return not self == other
def __repr__(self):
return "MaxKey()"

View File

@@ -0,0 +1,32 @@
# Copyright 2010-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Representation for the MongoDB internal MinKey type.
"""
class MinKey(object):
"""MongoDB internal MinKey type.
"""
def __eq__(self, other):
if isinstance(other, MinKey):
return True
return NotImplemented
def __ne__(self, other):
return not self == other
def __repr__(self):
return "MinKey()"

View File

@@ -0,0 +1,289 @@
# Copyright 2009-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for working with MongoDB `ObjectIds
<http://dochub.mongodb.org/core/objectids>`_.
"""
import binascii
import calendar
import datetime
try:
import hashlib
_md5func = hashlib.md5
except ImportError: # for Python < 2.5
import md5
_md5func = md5.new
import os
import random
import socket
import struct
import threading
import time
from asyncio_mongo._bson.errors import InvalidId
from asyncio_mongo._bson.py3compat import (PY3, b, binary_type, text_type,
bytes_from_hex, string_types)
from asyncio_mongo._bson.tz_util import utc
EMPTY = b("")
ZERO = b("\x00")
def _machine_bytes():
"""Get the machine portion of an ObjectId.
"""
machine_hash = _md5func()
if PY3:
# gethostname() returns a unicode string in python 3.x
# while update() requires a byte string.
machine_hash.update(socket.gethostname().encode())
else:
# Calling encode() here will fail with non-ascii hostnames
machine_hash.update(socket.gethostname())
return machine_hash.digest()[0:3]
class ObjectId(object):
"""A MongoDB ObjectId.
"""
_inc = random.randint(0, 0xFFFFFF)
_inc_lock = threading.Lock()
_machine_bytes = _machine_bytes()
__slots__ = ('__id')
def __init__(self, oid=None):
"""Initialize a new ObjectId.
If `oid` is ``None``, create a new (unique) ObjectId. If `oid`
is an instance of (:class:`basestring` (:class:`str` or :class:`bytes`
in python 3), :class:`ObjectId`) validate it and use that. Otherwise,
a :class:`TypeError` is raised. If `oid` is invalid,
:class:`~bson.errors.InvalidId` is raised.
:Parameters:
- `oid` (optional): a valid ObjectId (12 byte binary or 24 character
hex string)
.. versionadded:: 1.2.1
The `oid` parameter can be a ``unicode`` instance (that contains
only hexadecimal digits).
.. mongodoc:: objectids
"""
if oid is None:
self.__generate()
else:
self.__validate(oid)
@classmethod
def from_datetime(cls, generation_time):
"""Create a dummy ObjectId instance with a specific generation time.
This method is useful for doing range queries on a field
containing :class:`ObjectId` instances.
.. warning::
It is not safe to insert a document containing an ObjectId
generated using this method. This method deliberately
eliminates the uniqueness guarantee that ObjectIds
generally provide. ObjectIds generated with this method
should be used exclusively in queries.
`generation_time` will be converted to UTC. Naive datetime
instances will be treated as though they already contain UTC.
An example using this helper to get documents where ``"_id"``
was generated before January 1, 2010 would be:
>>> gen_time = datetime.datetime(2010, 1, 1)
>>> dummy_id = ObjectId.from_datetime(gen_time)
>>> result = collection.find({"_id": {"$lt": dummy_id}})
:Parameters:
- `generation_time`: :class:`~datetime.datetime` to be used
as the generation time for the resulting ObjectId.
.. versionchanged:: 1.8
Properly handle timezone aware values for
`generation_time`.
.. versionadded:: 1.6
"""
if generation_time.utcoffset() is not None:
generation_time = generation_time - generation_time.utcoffset()
ts = calendar.timegm(generation_time.timetuple())
oid = struct.pack(">i", int(ts)) + ZERO * 8
return cls(oid)
@classmethod
def is_valid(cls, oid):
"""Checks if a `oid` string is valid or not.
:Parameters:
- `oid`: the object id to validate
.. versionadded:: 2.3
"""
try:
ObjectId(oid)
return True
except (InvalidId, TypeError):
return False
def __generate(self):
"""Generate a new value for this ObjectId.
"""
oid = EMPTY
# 4 bytes current time
oid += struct.pack(">i", int(time.time()))
# 3 bytes machine
oid += ObjectId._machine_bytes
# 2 bytes pid
oid += struct.pack(">H", os.getpid() % 0xFFFF)
# 3 bytes inc
ObjectId._inc_lock.acquire()
oid += struct.pack(">i", ObjectId._inc)[1:4]
ObjectId._inc = (ObjectId._inc + 1) % 0xFFFFFF
ObjectId._inc_lock.release()
self.__id = oid
def __validate(self, oid):
"""Validate and use the given id for this ObjectId.
Raises TypeError if id is not an instance of
(:class:`basestring` (:class:`str` or :class:`bytes`
in python 3), ObjectId) and InvalidId if it is not a
valid ObjectId.
:Parameters:
- `oid`: a valid ObjectId
"""
if isinstance(oid, ObjectId):
self.__id = oid.__id
elif isinstance(oid, string_types):
if len(oid) == 12:
if isinstance(oid, binary_type):
self.__id = oid
else:
raise InvalidId("%s is not a valid ObjectId" % oid)
elif len(oid) == 24:
try:
self.__id = bytes_from_hex(oid)
except (TypeError, ValueError):
raise InvalidId("%s is not a valid ObjectId" % oid)
else:
raise InvalidId("%s is not a valid ObjectId" % oid)
else:
raise TypeError("id must be an instance of (%s, %s, ObjectId), "
"not %s" % (binary_type.__name__,
text_type.__name__, type(oid)))
@property
def binary(self):
"""12-byte binary representation of this ObjectId.
"""
return self.__id
@property
def generation_time(self):
"""A :class:`datetime.datetime` instance representing the time of
generation for this :class:`ObjectId`.
The :class:`datetime.datetime` is timezone aware, and
represents the generation time in UTC. It is precise to the
second.
.. versionchanged:: 1.8
Now return an aware datetime instead of a naive one.
.. versionadded:: 1.2
"""
t = struct.unpack(">i", self.__id[0:4])[0]
return datetime.datetime.fromtimestamp(t, utc)
def __getstate__(self):
"""return value of object for pickling.
needed explicitly because __slots__() defined.
"""
return self.__id
def __setstate__(self, value):
"""explicit state set from pickling
"""
# Provide backwards compatability with OIDs
# pickled with pymongo-1.9 or older.
if isinstance(value, dict):
oid = value["_ObjectId__id"]
else:
oid = value
# ObjectIds pickled in python 2.x used `str` for __id.
# In python 3.x this has to be converted to `bytes`
# by encoding latin-1.
if PY3 and isinstance(oid, text_type):
self.__id = oid.encode('latin-1')
else:
self.__id = oid
def __str__(self):
if PY3:
return binascii.hexlify(self.__id).decode()
return binascii.hexlify(self.__id)
def __repr__(self):
return "ObjectId('%s')" % (str(self),)
def __eq__(self, other):
if isinstance(other, ObjectId):
return self.__id == other.__id
return NotImplemented
def __ne__(self, other):
if isinstance(other, ObjectId):
return self.__id != other.__id
return NotImplemented
def __lt__(self, other):
if isinstance(other, ObjectId):
return self.__id < other.__id
return NotImplemented
def __le__(self, other):
if isinstance(other, ObjectId):
return self.__id <= other.__id
return NotImplemented
def __gt__(self, other):
if isinstance(other, ObjectId):
return self.__id > other.__id
return NotImplemented
def __ge__(self, other):
if isinstance(other, ObjectId):
return self.__id >= other.__id
return NotImplemented
def __hash__(self):
"""Get a hash value for this :class:`ObjectId`.
.. versionadded:: 1.1
"""
return hash(self.__id)

View File

@@ -0,0 +1,60 @@
# Copyright 2009-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you
# may not use this file except in compliance with the License. You
# may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
"""Utility functions and definitions for python3 compatibility."""
import sys
PY3 = sys.version_info[0] == 3
if PY3:
import codecs
from io import BytesIO as StringIO
def b(s):
# BSON and socket operations deal in binary data. In
# python 3 that means instances of `bytes`. In python
# 2.6 and 2.7 you can create an alias for `bytes` using
# the b prefix (e.g. b'foo'). Python 2.4 and 2.5 don't
# provide this marker so we provide this compat function.
# In python 3.x b('foo') results in b'foo'.
# See http://python3porting.com/problems.html#nicer-solutions
return codecs.latin_1_encode(s)[0]
def bytes_from_hex(h):
return bytes.fromhex(h)
binary_type = bytes
text_type = str
else:
try:
from io import StringIO
except ImportError:
from io import StringIO
def b(s):
# See comments above. In python 2.x b('foo') is just 'foo'.
return s
def bytes_from_hex(h):
return h.decode('hex')
binary_type = str
# 2to3 will convert this to "str". That's okay
# since we won't ever get here under python3.
text_type = str
string_types = (binary_type, text_type)

243
asyncio_mongo/_bson/son.py Normal file
View File

@@ -0,0 +1,243 @@
# Copyright 2009-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for creating and manipulating SON, the Serialized Ocument Notation.
Regular dictionaries can be used instead of SON objects, but not when the order
of keys is important. A SON object can be used just like a normal Python
dictionary."""
import copy
import re
# This sort of sucks, but seems to be as good as it gets...
# This is essentially the same as re._pattern_type
RE_TYPE = type(re.compile(""))
class SON(dict):
"""SON data.
A subclass of dict that maintains ordering of keys and provides a
few extra niceties for dealing with SON. SON objects can be
converted to and from asyncio_mongo._bson.
The mapping from Python types to BSON types is as follows:
=================================== ============= ===================
Python Type BSON Type Supported Direction
=================================== ============= ===================
None null both
bool boolean both
int [#int]_ int32 / int64 py -> bson
long int64 both
float number (real) both
string string py -> bson
unicode string both
list array both
dict / `SON` object both
datetime.datetime [#dt]_ [#dt2]_ date both
compiled re regex both
`bson.binary.Binary` binary both
`bson.objectid.ObjectId` oid both
`bson.dbref.DBRef` dbref both
None undefined bson -> py
unicode code bson -> py
`bson.code.Code` code py -> bson
unicode symbol bson -> py
bytes (Python 3) [#bytes]_ binary both
=================================== ============= ===================
Note that to save binary data it must be wrapped as an instance of
`bson.binary.Binary`. Otherwise it will be saved as a BSON string
and retrieved as unicode.
.. [#int] A Python int will be saved as a BSON int32 or BSON int64 depending
on its size. A BSON int32 will always decode to a Python int. In Python 2.x
a BSON int64 will always decode to a Python long. In Python 3.x a BSON
int64 will decode to a Python int since there is no longer a long type.
.. [#dt] datetime.datetime instances will be rounded to the nearest
millisecond when saved
.. [#dt2] all datetime.datetime instances are treated as *naive*. clients
should always use UTC.
.. [#bytes] The bytes type from Python 3.x is encoded as BSON binary with
subtype 0. In Python 3.x it will be decoded back to bytes. In Python 2.x
it will be decoded to an instance of :class:`~bson.binary.Binary` with
subtype 0.
"""
def __init__(self, data=None, **kwargs):
self.__keys = []
dict.__init__(self)
self.update(data)
self.update(kwargs)
def __new__(cls, *args, **kwargs):
instance = super(SON, cls).__new__(cls, *args, **kwargs)
instance.__keys = []
return instance
def __repr__(self):
result = []
for key in self.__keys:
result.append("(%r, %r)" % (key, self[key]))
return "SON([%s])" % ", ".join(result)
def __setitem__(self, key, value):
if key not in self:
self.__keys.append(key)
dict.__setitem__(self, key, value)
def __delitem__(self, key):
self.__keys.remove(key)
dict.__delitem__(self, key)
def keys(self):
return list(self.__keys)
def copy(self):
other = SON()
other.update(self)
return other
# TODO this is all from UserDict.DictMixin. it could probably be made more
# efficient.
# second level definitions support higher levels
def __iter__(self):
for k in list(self.keys()):
yield k
def has_key(self, key):
return key in list(self.keys())
def __contains__(self, key):
return key in list(self.keys())
# third level takes advantage of second level definitions
def iteritems(self):
for k in self:
yield (k, self[k])
def iterkeys(self):
return self.__iter__()
# fourth level uses definitions from lower levels
def itervalues(self):
for _, v in self.items():
yield v
def values(self):
return [v for _, v in self.items()]
def items(self):
return [(key, self[key]) for key in self]
def clear(self):
for key in list(self.keys()):
del self[key]
def setdefault(self, key, default=None):
try:
return self[key]
except KeyError:
self[key] = default
return default
def pop(self, key, *args):
if len(args) > 1:
raise TypeError("pop expected at most 2 arguments, got "\
+ repr(1 + len(args)))
try:
value = self[key]
except KeyError:
if args:
return args[0]
raise
del self[key]
return value
def popitem(self):
try:
k, v = next(iter(self.items()))
except StopIteration:
raise KeyError('container is empty')
del self[k]
return (k, v)
def update(self, other=None, **kwargs):
# Make progressively weaker assumptions about "other"
if other is None:
pass
elif hasattr(other, 'iteritems'): # iteritems saves memory and lookups
for k, v in other.items():
self[k] = v
elif hasattr(other, 'keys'):
for k in list(other.keys()):
self[k] = other[k]
else:
for k, v in other:
self[k] = v
if kwargs:
self.update(kwargs)
def get(self, key, default=None):
try:
return self[key]
except KeyError:
return default
def __eq__(self, other):
"""Comparison to another SON is order-sensitive while comparison to a
regular dictionary is order-insensitive.
"""
if isinstance(other, SON):
return len(self) == len(other) and list(self.items()) == list(other.items())
return self.to_dict() == other
def __ne__(self, other):
return not self == other
def __len__(self):
return len(list(self.keys()))
def to_dict(self):
"""Convert a SON document to a normal Python dictionary instance.
This is trickier than just *dict(...)* because it needs to be
recursive.
"""
def transform_value(value):
if isinstance(value, list):
return [transform_value(v) for v in value]
if isinstance(value, SON):
value = dict(value)
if isinstance(value, dict):
for k, v in value.items():
value[k] = transform_value(v)
return value
return transform_value(dict(self))
def __deepcopy__(self, memo):
out = SON()
val_id = id(self)
if val_id in memo:
return memo.get(val_id)
memo[val_id] = out
for k, v in self.items():
if not isinstance(v, RE_TYPE):
v = copy.deepcopy(v, memo)
out[k] = v
return out

View File

@@ -0,0 +1,97 @@
# Copyright 2010-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools for representing MongoDB internal Timestamps.
"""
import calendar
import datetime
from asyncio_mongo._bson.tz_util import utc
UPPERBOUND = 4294967296
class Timestamp(object):
"""MongoDB internal timestamps used in the opLog.
"""
def __init__(self, time, inc):
"""Create a new :class:`Timestamp`.
This class is only for use with the MongoDB opLog. If you need
to store a regular timestamp, please use a
:class:`~datetime.datetime`.
Raises :class:`TypeError` if `time` is not an instance of
:class: `int` or :class:`~datetime.datetime`, or `inc` is not
an instance of :class:`int`. Raises :class:`ValueError` if
`time` or `inc` is not in [0, 2**32).
:Parameters:
- `time`: time in seconds since epoch UTC, or a naive UTC
:class:`~datetime.datetime`, or an aware
:class:`~datetime.datetime`
- `inc`: the incrementing counter
.. versionchanged:: 1.7
`time` can now be a :class:`~datetime.datetime` instance.
"""
if isinstance(time, datetime.datetime):
if time.utcoffset() is not None:
time = time - time.utcoffset()
time = int(calendar.timegm(time.timetuple()))
if not isinstance(time, int):
raise TypeError("time must be an instance of int")
if not isinstance(inc, int):
raise TypeError("inc must be an instance of int")
if not 0 <= time < UPPERBOUND:
raise ValueError("time must be contained in [0, 2**32)")
if not 0 <= inc < UPPERBOUND:
raise ValueError("inc must be contained in [0, 2**32)")
self.__time = time
self.__inc = inc
@property
def time(self):
"""Get the time portion of this :class:`Timestamp`.
"""
return self.__time
@property
def inc(self):
"""Get the inc portion of this :class:`Timestamp`.
"""
return self.__inc
def __eq__(self, other):
if isinstance(other, Timestamp):
return (self.__time == other.time and self.__inc == other.inc)
else:
return NotImplemented
def __ne__(self, other):
return not self == other
def __repr__(self):
return "Timestamp(%s, %s)" % (self.__time, self.__inc)
def as_datetime(self):
"""Return a :class:`~datetime.datetime` instance corresponding
to the time portion of this :class:`Timestamp`.
.. versionchanged:: 1.8
The returned datetime is now timezone aware.
"""
return datetime.datetime.fromtimestamp(self.__time, utc)

View File

@@ -0,0 +1,52 @@
# Copyright 2010-2012 10gen, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Timezone related utilities for BSON."""
from datetime import (timedelta,
tzinfo)
ZERO = timedelta(0)
class FixedOffset(tzinfo):
"""Fixed offset timezone, in minutes east from UTC.
Implementation based from the Python `standard library documentation
<http://docs.python.org/library/datetime.html#tzinfo-objects>`_.
Defining __getinitargs__ enables pickling / copying.
"""
def __init__(self, offset, name):
if isinstance(offset, timedelta):
self.__offset = offset
else:
self.__offset = timedelta(minutes=offset)
self.__name = name
def __getinitargs__(self):
return self.__offset, self.__name
def utcoffset(self, dt):
return self.__offset
def tzname(self, dt):
return self.__name
def dst(self, dt):
return ZERO
utc = FixedOffset(0, "UTC")
"""Fixed offset timezone representing UTC."""