Initial commit

* Examples work * setup.py kinda updasted * Fork of txmongo but with new pymongo embedded
2014-01-24 16:19:17 -07:00
commit ae154dbc72
60 changed files with 14825 additions and 0 deletions
--- a/asyncio_mongo/_bson/init.py
+++ b/asyncio_mongo/_bson/init.py
@@ -0,0 +1,616 @@
+# Copyright 2009-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""BSON (Binary JSON) encoding and decoding.
+"""
+
+import calendar
+import datetime
+import re
+import struct
+import sys
+
+from asyncio_mongo._bson.binary import (Binary, OLD_UUID_SUBTYPE,
+                         JAVA_LEGACY, CSHARP_LEGACY)
+from asyncio_mongo._bson.code import Code
+from asyncio_mongo._bson.dbref import DBRef
+from asyncio_mongo._bson.errors import (InvalidBSON,
+                         InvalidDocument,
+                         InvalidStringData)
+from asyncio_mongo._bson.max_key import MaxKey
+from asyncio_mongo._bson.min_key import MinKey
+from asyncio_mongo._bson.objectid import ObjectId
+from asyncio_mongo._bson.py3compat import b, binary_type
+from asyncio_mongo._bson.son import SON, RE_TYPE
+from asyncio_mongo._bson.timestamp import Timestamp
+from asyncio_mongo._bson.tz_util import utc
+
+
+try:
+    from asyncio_mongo._bson import _cbson
+    _use_c = True
+except ImportError:
+    _use_c = False
+
+try:
+    import uuid
+    _use_uuid = True
+except ImportError:
+    _use_uuid = False
+
+PY3 = sys.version_info[0] == 3
+
+
+MAX_INT32 = 2147483647
+MIN_INT32 = -2147483648
+MAX_INT64 = 9223372036854775807
+MIN_INT64 = -9223372036854775808
+
+EPOCH_AWARE = datetime.datetime.fromtimestamp(0, utc)
+EPOCH_NAIVE = datetime.datetime.utcfromtimestamp(0)
+
+# Create constants compatible with all versions of
+# python from 2.4 forward. In 2.x b("foo") is just
+# "foo". In 3.x it becomes b"foo".
+EMPTY = b("")
+ZERO  = b("\x00")
+ONE   = b("\x01")
+
+BSONNUM = b("\x01") # Floating point
+BSONSTR = b("\x02") # UTF-8 string
+BSONOBJ = b("\x03") # Embedded document
+BSONARR = b("\x04") # Array
+BSONBIN = b("\x05") # Binary
+BSONUND = b("\x06") # Undefined
+BSONOID = b("\x07") # ObjectId
+BSONBOO = b("\x08") # Boolean
+BSONDAT = b("\x09") # UTC Datetime
+BSONNUL = b("\x0A") # Null
+BSONRGX = b("\x0B") # Regex
+BSONREF = b("\x0C") # DBRef
+BSONCOD = b("\x0D") # Javascript code
+BSONSYM = b("\x0E") # Symbol
+BSONCWS = b("\x0F") # Javascript code with scope
+BSONINT = b("\x10") # 32bit int
+BSONTIM = b("\x11") # Timestamp
+BSONLON = b("\x12") # 64bit int
+BSONMIN = b("\xFF") # Min key
+BSONMAX = b("\x7F") # Max key
+
+
+def _get_int(data, position, as_class=None,
+             tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE, unsigned=False):
+    format = unsigned and "I" or "i"
+    try:
+        value = struct.unpack("<%s" % format, data[position:position + 4])[0]
+    except struct.error:
+        raise InvalidBSON()
+    position += 4
+    return value, position
+
+
+def _get_c_string(data, position, length=None):
+    if length is None:
+        try:
+            end = data.index(ZERO, position)
+        except ValueError:
+            raise InvalidBSON()
+    else:
+        end = position + length
+    value = data[position:end].decode("utf-8")
+    position = end + 1
+
+    return value, position
+
+
+def _make_c_string(string, check_null=False):
+    if isinstance(string, str):
+        if check_null and "\x00" in string:
+            raise InvalidDocument("BSON keys / regex patterns must not "
+                                  "contain a NULL character")
+        return string.encode("utf-8") + ZERO
+    else:
+        if check_null and ZERO in string:
+            raise InvalidDocument("BSON keys / regex patterns must not "
+                                  "contain a NULL character")
+        try:
+            string.decode("utf-8")
+            return string + ZERO
+        except UnicodeError:
+            raise InvalidStringData("strings in documents must be valid "
+                                    "UTF-8: %r" % string)
+
+
+def _get_number(data, position, as_class, tz_aware, uuid_subtype):
+    num = struct.unpack("<d", data[position:position + 8])[0]
+    position += 8
+    return num, position
+
+
+def _get_string(data, position, as_class, tz_aware, uuid_subtype):
+    length = struct.unpack("<i", data[position:position + 4])[0]
+    if (len(data) - position - 4) < length:
+        raise InvalidBSON("invalid string length")
+    position += 4
+    if data[position + length - 1:position + length] != ZERO:
+        raise InvalidBSON("invalid end of string")
+    return _get_c_string(data, position, length - 1)
+
+
+def _get_object(data, position, as_class, tz_aware, uuid_subtype):
+    obj_size = struct.unpack("<i", data[position:position + 4])[0]
+    if data[position + obj_size - 1:position + obj_size] != ZERO:
+        raise InvalidBSON("bad eoo")
+    encoded = data[position + 4:position + obj_size - 1]
+    object = _elements_to_dict(encoded, as_class, tz_aware, uuid_subtype)
+    position += obj_size
+    if "$ref" in object:
+        return (DBRef(object.pop("$ref"), object.pop("$id", None),
+                      object.pop("$db", None), object), position)
+    return object, position
+
+
+def _get_array(data, position, as_class, tz_aware, uuid_subtype):
+    obj, position = _get_object(data, position,
+                                as_class, tz_aware, uuid_subtype)
+    result = []
+    i = 0
+    while True:
+        try:
+            result.append(obj[str(i)])
+            i += 1
+        except KeyError:
+            break
+    return result, position
+
+
+def _get_binary(data, position, as_class, tz_aware, uuid_subtype):
+    length, position = _get_int(data, position)
+    subtype = ord(data[position:position + 1])
+    position += 1
+    if subtype == 2:
+        length2, position = _get_int(data, position)
+        if length2 != length - 4:
+            raise InvalidBSON("invalid binary (st 2) - lengths don't match!")
+        length = length2
+    if subtype in (3, 4) and _use_uuid:
+        # Java Legacy
+        if uuid_subtype == JAVA_LEGACY:
+            java = data[position:position + length]
+            value = uuid.UUID(bytes=java[0:8][::-1] + java[8:16][::-1])
+        # C# legacy
+        elif uuid_subtype == CSHARP_LEGACY:
+            value = uuid.UUID(bytes_le=data[position:position + length])
+        # Python
+        else:
+            value = uuid.UUID(bytes=data[position:position + length])
+        position += length
+        return (value, position)
+    # Python3 special case. Decode subtype 0 to 'bytes'.
+    if PY3 and subtype == 0:
+        value = data[position:position + length]
+    else:
+        value = Binary(data[position:position + length], subtype)
+    position += length
+    return value, position
+
+
+def _get_oid(data, position, as_class=None,
+             tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
+    value = ObjectId(data[position:position + 12])
+    position += 12
+    return value, position
+
+
+def _get_boolean(data, position, as_class, tz_aware, uuid_subtype):
+    value = data[position:position + 1] == ONE
+    position += 1
+    return value, position
+
+
+def _get_date(data, position, as_class, tz_aware, uuid_subtype):
+    millis = struct.unpack("<q", data[position:position + 8])[0]
+    diff = millis % 1000
+    seconds = (millis - diff) / 1000
+    position += 8
+    if tz_aware:
+        dt = EPOCH_AWARE + datetime.timedelta(seconds=seconds)
+    else:
+        dt = EPOCH_NAIVE + datetime.timedelta(seconds=seconds)
+    return dt.replace(microsecond=diff * 1000), position
+
+
+def _get_code(data, position, as_class, tz_aware, uuid_subtype):
+    code, position = _get_string(data, position,
+                                 as_class, tz_aware, uuid_subtype)
+    return Code(code), position
+
+
+def _get_code_w_scope(data, position, as_class, tz_aware, uuid_subtype):
+    _, position = _get_int(data, position)
+    code, position = _get_string(data, position,
+                                 as_class, tz_aware, uuid_subtype)
+    scope, position = _get_object(data, position,
+                                  as_class, tz_aware, uuid_subtype)
+    return Code(code, scope), position
+
+
+def _get_null(data, position, as_class, tz_aware, uuid_subtype):
+    return None, position
+
+
+def _get_regex(data, position, as_class, tz_aware, uuid_subtype):
+    pattern, position = _get_c_string(data, position)
+    bson_flags, position = _get_c_string(data, position)
+    flags = 0
+    if "i" in bson_flags:
+        flags |= re.IGNORECASE
+    if "l" in bson_flags:
+        flags |= re.LOCALE
+    if "m" in bson_flags:
+        flags |= re.MULTILINE
+    if "s" in bson_flags:
+        flags |= re.DOTALL
+    if "u" in bson_flags:
+        flags |= re.UNICODE
+    if "x" in bson_flags:
+        flags |= re.VERBOSE
+    return re.compile(pattern, flags), position
+
+
+def _get_ref(data, position, as_class, tz_aware, uuid_subtype):
+    position += 4
+    collection, position = _get_c_string(data, position)
+    oid, position = _get_oid(data, position)
+    return DBRef(collection, oid), position
+
+
+def _get_timestamp(data, position, as_class, tz_aware, uuid_subtype):
+    inc, position = _get_int(data, position, unsigned=True)
+    timestamp, position = _get_int(data, position, unsigned=True)
+    return Timestamp(timestamp, inc), position
+
+
+def _get_long(data, position, as_class, tz_aware, uuid_subtype):
+    # Have to cast to long; on 32-bit unpack may return an int.
+    # 2to3 will change long to int. That's fine since long doesn't
+    # exist in python3.
+    value = int(struct.unpack("<q", data[position:position + 8])[0])
+    position += 8
+    return value, position
+
+
+_element_getter = {
+    BSONNUM: _get_number,
+    BSONSTR: _get_string,
+    BSONOBJ: _get_object,
+    BSONARR: _get_array,
+    BSONBIN: _get_binary,
+    BSONUND: _get_null,  # undefined
+    BSONOID: _get_oid,
+    BSONBOO: _get_boolean,
+    BSONDAT: _get_date,
+    BSONNUL: _get_null,
+    BSONRGX: _get_regex,
+    BSONREF: _get_ref,
+    BSONCOD: _get_code,  # code
+    BSONSYM: _get_string,  # symbol
+    BSONCWS: _get_code_w_scope,
+    BSONINT: _get_int,  # number_int
+    BSONTIM: _get_timestamp,
+    BSONLON: _get_long, # Same as _get_int after 2to3 runs.
+    BSONMIN: lambda v, w, x, y, z: (MinKey(), w),
+    BSONMAX: lambda v, w, x, y, z: (MaxKey(), w)}
+
+
+def _element_to_dict(data, position, as_class, tz_aware, uuid_subtype):
+    element_type = data[position:position + 1]
+    position += 1
+    element_name, position = _get_c_string(data, position)
+    value, position = _element_getter[element_type](data, position, as_class,
+                                                    tz_aware, uuid_subtype)
+    return element_name, value, position
+
+
+def _elements_to_dict(data, as_class, tz_aware, uuid_subtype):
+    result = as_class()
+    position = 0
+    end = len(data) - 1
+    while position < end:
+        (key, value, position) = _element_to_dict(data, position, as_class,
+                                                  tz_aware, uuid_subtype)
+        result[key] = value
+    return result
+
+def _bson_to_dict(data, as_class, tz_aware, uuid_subtype):
+    obj_size = struct.unpack("<i", data[:4])[0]
+    length = len(data)
+    if length < obj_size:
+        raise InvalidBSON("objsize too large")
+    if obj_size != length or data[obj_size - 1:obj_size] != ZERO:
+        raise InvalidBSON("bad eoo")
+    elements = data[4:obj_size - 1]
+    return (_elements_to_dict(elements, as_class,
+                              tz_aware, uuid_subtype), data[obj_size:])
+if _use_c:
+    _bson_to_dict = _cbson._bson_to_dict
+
+
+def _element_to_bson(key, value, check_keys, uuid_subtype):
+    if not isinstance(key, str):
+        raise InvalidDocument("documents must have only string keys, "
+                              "key was %r" % key)
+
+    if check_keys:
+        if key.startswith("$"):
+            raise InvalidDocument("key %r must not start with '$'" % key)
+        if "." in key:
+            raise InvalidDocument("key %r must not contain '.'" % key)
+
+    name = _make_c_string(key, True)
+    if isinstance(value, float):
+        return BSONNUM + name + struct.pack("<d", value)
+
+    if _use_uuid:
+        if isinstance(value, uuid.UUID):
+            # Java Legacy
+            if uuid_subtype == JAVA_LEGACY:
+                # Python 3.0(.1) returns a bytearray instance for bytes (3.1
+                # and newer just return a bytes instance). Convert that to
+                # binary_type (here and below) for compatibility.
+                from_uuid = binary_type(value.bytes)
+                as_legacy_java = from_uuid[0:8][::-1] + from_uuid[8:16][::-1]
+                value = Binary(as_legacy_java, subtype=OLD_UUID_SUBTYPE)
+            # C# legacy
+            elif uuid_subtype == CSHARP_LEGACY:
+                # Microsoft GUID representation.
+                value = Binary(binary_type(value.bytes_le),
+                               subtype=OLD_UUID_SUBTYPE)
+            # Python
+            else:
+                value = Binary(binary_type(value.bytes), subtype=uuid_subtype)
+
+    if isinstance(value, Binary):
+        subtype = value.subtype
+        if subtype == 2:
+            value = struct.pack("<i", len(value)) + value
+        return (BSONBIN + name +
+                struct.pack("<i", len(value)) + b(chr(subtype)) + value)
+    if isinstance(value, Code):
+        cstring = _make_c_string(value)
+        if not value.scope:
+            length = struct.pack("<i", len(cstring))
+            return BSONCOD + name + length + cstring
+        scope = _dict_to_bson(value.scope, False, uuid_subtype, False)
+        full_length = struct.pack("<i", 8 + len(cstring) + len(scope))
+        length = struct.pack("<i", len(cstring))
+        return BSONCWS + name + full_length + length + cstring + scope
+    if isinstance(value, binary_type):
+        if PY3:
+            # Python3 special case. Store 'bytes' as BSON binary subtype 0.
+            return (BSONBIN + name +
+                    struct.pack("<i", len(value)) + ZERO + value)
+        cstring = _make_c_string(value)
+        length = struct.pack("<i", len(cstring))
+        return BSONSTR + name + length + cstring
+    if isinstance(value, str):
+        cstring = _make_c_string(value)
+        length = struct.pack("<i", len(cstring))
+        return BSONSTR + name + length + cstring
+    if isinstance(value, dict):
+        return BSONOBJ + name + _dict_to_bson(value, check_keys, uuid_subtype, False)
+    if isinstance(value, (list, tuple)):
+        as_dict = SON(list(zip([str(i) for i in range(len(value))], value)))
+        return BSONARR + name + _dict_to_bson(as_dict, check_keys, uuid_subtype, False)
+    if isinstance(value, ObjectId):
+        return BSONOID + name + value.binary
+    if value is True:
+        return BSONBOO + name + ONE
+    if value is False:
+        return BSONBOO + name + ZERO
+    if isinstance(value, int):
+        # TODO this is an ugly way to check for this...
+        if value > MAX_INT64 or value < MIN_INT64:
+            raise OverflowError("BSON can only handle up to 8-byte ints")
+        if value > MAX_INT32 or value < MIN_INT32:
+            return BSONLON + name + struct.pack("<q", value)
+        return BSONINT + name + struct.pack("<i", value)
+    # 2to3 will convert long to int here since there is no long in python3.
+    # That's OK. The previous if block will match instead.
+    if isinstance(value, int):
+        if value > MAX_INT64 or value < MIN_INT64:
+            raise OverflowError("BSON can only handle up to 8-byte ints")
+        return BSONLON + name + struct.pack("<q", value)
+    if isinstance(value, datetime.datetime):
+        if value.utcoffset() is not None:
+            value = value - value.utcoffset()
+        millis = int(calendar.timegm(value.timetuple()) * 1000 +
+                     value.microsecond / 1000)
+        return BSONDAT + name + struct.pack("<q", millis)
+    if isinstance(value, Timestamp):
+        time = struct.pack("<I", value.time)
+        inc = struct.pack("<I", value.inc)
+        return BSONTIM + name + inc + time
+    if value is None:
+        return BSONNUL + name
+    if isinstance(value, RE_TYPE):
+        pattern = value.pattern
+        flags = ""
+        if value.flags & re.IGNORECASE:
+            flags += "i"
+        if value.flags & re.LOCALE:
+            flags += "l"
+        if value.flags & re.MULTILINE:
+            flags += "m"
+        if value.flags & re.DOTALL:
+            flags += "s"
+        if value.flags & re.UNICODE:
+            flags += "u"
+        if value.flags & re.VERBOSE:
+            flags += "x"
+        return BSONRGX + name + _make_c_string(pattern, True) + \
+            _make_c_string(flags)
+    if isinstance(value, DBRef):
+        return _element_to_bson(key, value.as_doc(), False, uuid_subtype)
+    if isinstance(value, MinKey):
+        return BSONMIN + name
+    if isinstance(value, MaxKey):
+        return BSONMAX + name
+
+    raise InvalidDocument("cannot convert value of type %s to bson" %
+                          type(value))
+
+
+def _dict_to_bson(dict, check_keys, uuid_subtype, top_level=True):
+    try:
+        elements = []
+        if top_level and "_id" in dict:
+            elements.append(_element_to_bson("_id", dict["_id"], False, uuid_subtype))
+        for (key, value) in dict.items():
+            if not top_level or key != "_id":
+                elements.append(_element_to_bson(key, value, check_keys, uuid_subtype))
+    except AttributeError:
+        raise TypeError("encoder expected a mapping type but got: %r" % dict)
+
+    encoded = EMPTY.join(elements)
+    length = len(encoded) + 5
+    return struct.pack("<i", length) + encoded + ZERO
+if _use_c:
+    _dict_to_bson = _cbson._dict_to_bson
+
+
+
+def decode_all(data, as_class=dict,
+               tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE):
+    """Decode BSON data to multiple documents.
+
+    `data` must be a string of concatenated, valid, BSON-encoded
+    documents.
+
+    :Parameters:
+      - `data`: BSON data
+      - `as_class` (optional): the class to use for the resulting
+        documents
+      - `tz_aware` (optional): if ``True``, return timezone-aware
+        :class:`~datetime.datetime` instances
+
+    .. versionadded:: 1.9
+    """
+    docs = []
+    position = 0
+    end = len(data) - 1
+    while position < end:
+        obj_size = struct.unpack("<i", data[position:position + 4])[0]
+        if len(data) - position < obj_size:
+            raise InvalidBSON("objsize too large")
+        if data[position + obj_size - 1:position + obj_size] != ZERO:
+            raise InvalidBSON("bad eoo")
+        elements = data[position + 4:position + obj_size - 1]
+        position += obj_size
+        docs.append(_elements_to_dict(elements, as_class,
+                                      tz_aware, uuid_subtype))
+    return docs
+if _use_c:
+    decode_all = _cbson.decode_all
+
+
+def is_valid(bson):
+    """Check that the given string represents valid :class:`BSON` data.
+
+    Raises :class:`TypeError` if `bson` is not an instance of
+    :class:`str` (:class:`bytes` in python 3). Returns ``True``
+    if `bson` is valid :class:`BSON`, ``False`` otherwise.
+
+    :Parameters:
+      - `bson`: the data to be validated
+    """
+    if not isinstance(bson, binary_type):
+        raise TypeError("BSON data must be an instance "
+                        "of a subclass of %s" % (binary_type.__name__,))
+
+    try:
+        (_, remainder) = _bson_to_dict(bson, dict, True, OLD_UUID_SUBTYPE)
+        return remainder == EMPTY
+    except:
+        return False
+
+
+class BSON(binary_type):
+    """BSON (Binary JSON) data.
+    """
+
+    @classmethod
+    def encode(cls, document, check_keys=False, uuid_subtype=OLD_UUID_SUBTYPE):
+        """Encode a document to a new :class:`BSON` instance.
+
+        A document can be any mapping type (like :class:`dict`).
+
+        Raises :class:`TypeError` if `document` is not a mapping type,
+        or contains keys that are not instances of
+        :class:`basestring` (:class:`str` in python 3). Raises
+        :class:`~bson.errors.InvalidDocument` if `document` cannot be
+        converted to :class:`BSON`.
+
+        :Parameters:
+          - `document`: mapping type representing a document
+          - `check_keys` (optional): check if keys start with '$' or
+            contain '.', raising :class:`~bson.errors.InvalidDocument` in
+            either case
+
+        .. versionadded:: 1.9
+        """
+        return cls(_dict_to_bson(document, check_keys, uuid_subtype))
+
+    def decode(self, as_class=dict,
+               tz_aware=False, uuid_subtype=OLD_UUID_SUBTYPE):
+        """Decode this BSON data.
+
+        The default type to use for the resultant document is
+        :class:`dict`. Any other class that supports
+        :meth:`__setitem__` can be used instead by passing it as the
+        `as_class` parameter.
+
+        If `tz_aware` is ``True`` (recommended), any
+        :class:`~datetime.datetime` instances returned will be
+        timezone-aware, with their timezone set to
+        :attr:`bson.tz_util.utc`. Otherwise (default), all
+        :class:`~datetime.datetime` instances will be naive (but
+        contain UTC).
+
+        :Parameters:
+          - `as_class` (optional): the class to use for the resulting
+            document
+          - `tz_aware` (optional): if ``True``, return timezone-aware
+            :class:`~datetime.datetime` instances
+
+        .. versionadded:: 1.9
+        """
+        (document, _) = _bson_to_dict(self, as_class, tz_aware, uuid_subtype)
+        return document
+
+
+def has_c():
+    """Is the C extension installed?
+
+    .. versionadded:: 1.9
+    """
+    return _use_c
+
+
+def has_uuid():
+    """Is the uuid module available?
+
+    .. versionadded:: 2.3
+    """
+    return _use_uuid
--- a/asyncio_mongo/_bson/_cbson.so
+++ b/asyncio_mongo/_bson/_cbson.so
--- a/asyncio_mongo/_bson/binary.py
+++ b/asyncio_mongo/_bson/binary.py
@@ -0,0 +1,229 @@
+# Copyright 2009-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+    from uuid import UUID
+except ImportError:
+    # Python2.4 doesn't have a uuid module.
+    pass
+
+from asyncio_mongo._bson.py3compat import PY3, binary_type
+
+"""Tools for representing BSON binary data.
+"""
+
+BINARY_SUBTYPE = 0
+"""BSON binary subtype for binary data.
+
+This is the default subtype for binary data.
+
+.. versionadded:: 1.5
+"""
+
+FUNCTION_SUBTYPE = 1
+"""BSON binary subtype for functions.
+
+.. versionadded:: 1.5
+"""
+
+OLD_BINARY_SUBTYPE = 2
+"""Old BSON binary subtype for binary data.
+
+This is the old default subtype, the current
+default is :data:`BINARY_SUBTYPE`.
+
+.. versionadded:: 1.7
+"""
+
+OLD_UUID_SUBTYPE = 3
+"""Old BSON binary subtype for a UUID.
+
+:class:`uuid.UUID` instances will automatically be encoded
+by :mod:`bson` using this subtype.
+
+.. versionadded:: 2.1
+"""
+
+UUID_SUBTYPE = 4
+"""BSON binary subtype for a UUID.
+
+This is the new BSON binary subtype for UUIDs. The
+current default is :data:`OLD_UUID_SUBTYPE` but will
+change to this in a future release.
+
+.. versionchanged:: 2.1
+   Changed to subtype 4.
+.. versionadded:: 1.5
+"""
+
+JAVA_LEGACY = 5
+"""Used with :attr:`pymongo.collection.Collection.uuid_subtype`
+to specify that UUIDs should be stored in the legacy byte order
+used by the Java driver.
+
+:class:`uuid.UUID` instances will automatically be encoded
+by :mod:`bson` using :data:`OLD_UUID_SUBTYPE`.
+
+.. versionadded:: 2.3
+"""
+
+CSHARP_LEGACY = 6
+"""Used with :attr:`pymongo.collection.Collection.uuid_subtype`
+to specify that UUIDs should be stored in the legacy byte order
+used by the C# driver.
+
+:class:`uuid.UUID` instances will automatically be encoded
+by :mod:`bson` using :data:`OLD_UUID_SUBTYPE`.
+
+.. versionadded:: 2.3
+"""
+
+ALL_UUID_SUBTYPES = (OLD_UUID_SUBTYPE, UUID_SUBTYPE, JAVA_LEGACY, CSHARP_LEGACY)
+
+MD5_SUBTYPE = 5
+"""BSON binary subtype for an MD5 hash.
+
+.. versionadded:: 1.5
+"""
+
+USER_DEFINED_SUBTYPE = 128
+"""BSON binary subtype for any user defined structure.
+
+.. versionadded:: 1.5
+"""
+
+
+class Binary(binary_type):
+    """Representation of BSON binary data.
+
+    This is necessary because we want to represent Python strings as
+    the BSON string type. We need to wrap binary data so we can tell
+    the difference between what should be considered binary data and
+    what should be considered a string when we encode to BSON.
+
+    Raises TypeError if `data` is not an instance of :class:`str`
+    (:class:`bytes` in python 3) or `subtype` is not an instance of
+    :class:`int`. Raises ValueError if `subtype` is not in [0, 256).
+
+    .. note::
+      In python 3 instances of Binary with subtype 0 will be decoded
+      directly to :class:`bytes`.
+
+    :Parameters:
+      - `data`: the binary data to represent
+      - `subtype` (optional): the `binary subtype
+        <http://bsonspec.org/#/specification>`_
+        to use
+    """
+
+    def __new__(cls, data, subtype=BINARY_SUBTYPE):
+        if not isinstance(data, binary_type):
+            raise TypeError("data must be an "
+                            "instance of %s" % (binary_type.__name__,))
+        if not isinstance(subtype, int):
+            raise TypeError("subtype must be an instance of int")
+        if subtype >= 256 or subtype < 0:
+            raise ValueError("subtype must be contained in [0, 256)")
+        self = binary_type.__new__(cls, data)
+        self.__subtype = subtype
+        return self
+
+    @property
+    def subtype(self):
+        """Subtype of this binary data.
+        """
+        return self.__subtype
+
+    def __getnewargs__(self):
+        # Work around http://bugs.python.org/issue7382
+        data = super(Binary, self).__getnewargs__()[0]
+        if PY3 and not isinstance(data, binary_type):
+            data = data.encode('latin-1')
+        return data, self.__subtype
+
+    def __eq__(self, other):
+        if isinstance(other, Binary):
+            return ((self.__subtype, binary_type(self)) ==
+                    (other.subtype, binary_type(other)))
+        # We don't return NotImplemented here because if we did then
+        # Binary("foo") == "foo" would return True, since Binary is a
+        # subclass of str...
+        return False
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __repr__(self):
+        return "Binary(%s, %s)" % (binary_type.__repr__(self), self.__subtype)
+
+
+class UUIDLegacy(Binary):
+    """UUID wrapper to support working with UUIDs stored as legacy
+    BSON binary subtype 3.
+
+    .. doctest::
+
+      >>> import uuid
+      >>> from asyncio_mongo._bson.binary import Binary, UUIDLegacy, UUID_SUBTYPE
+      >>> my_uuid = uuid.uuid4()
+      >>> coll = db.test
+      >>> coll.uuid_subtype = UUID_SUBTYPE
+      >>> coll.insert({'uuid': Binary(my_uuid.bytes, 3)})
+      ObjectId('...')
+      >>> coll.find({'uuid': my_uuid}).count()
+      0
+      >>> coll.find({'uuid': UUIDLegacy(my_uuid)}).count()
+      1
+      >>> coll.find({'uuid': UUIDLegacy(my_uuid)})[0]['uuid']
+      UUID('...')
+      >>>
+      >>> # Convert from subtype 3 to subtype 4
+      >>> doc = coll.find_one({'uuid': UUIDLegacy(my_uuid)})
+      >>> coll.save(doc)
+      ObjectId('...')
+      >>> coll.find({'uuid': UUIDLegacy(my_uuid)}).count()
+      0
+      >>> coll.find({'uuid': {'$in': [UUIDLegacy(my_uuid), my_uuid]}}).count()
+      1
+      >>> coll.find_one({'uuid': my_uuid})['uuid']
+      UUID('...')
+
+    Raises TypeError if `obj` is not an instance of :class:`~uuid.UUID`.
+
+    :Parameters:
+      - `obj`: An instance of :class:`~uuid.UUID`.
+    """
+
+    def __new__(cls, obj):
+        if not isinstance(obj, UUID):
+            raise TypeError("obj must be an instance of uuid.UUID")
+        # Python 3.0(.1) returns a bytearray instance for bytes (3.1 and
+        # newer just return a bytes instance). Convert that to binary_type
+        # for compatibility.
+        self = Binary.__new__(cls, binary_type(obj.bytes), OLD_UUID_SUBTYPE)
+        self.__uuid = obj
+        return self
+
+    def __getnewargs__(self):
+        # Support copy and deepcopy
+        return (self.__uuid,)
+
+    @property
+    def uuid(self):
+        """UUID instance wrapped by this UUIDLegacy instance.
+        """
+        return self.__uuid
+
+    def __repr__(self):
+        return "UUIDLegacy('%s')" % self.__uuid
--- a/asyncio_mongo/_bson/code.py
+++ b/asyncio_mongo/_bson/code.py
@@ -0,0 +1,78 @@
+# Copyright 2009-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tools for representing JavaScript code in BSON.
+"""
+
+class Code(str):
+    """BSON's JavaScript code type.
+
+    Raises :class:`TypeError` if `code` is not an instance of
+    :class:`basestring` (:class:`str` in python 3) or `scope`
+    is not ``None`` or an instance of :class:`dict`.
+
+    Scope variables can be set by passing a dictionary as the `scope`
+    argument or by using keyword arguments. If a variable is set as a
+    keyword argument it will override any setting for that variable in
+    the `scope` dictionary.
+
+    :Parameters:
+      - `code`: string containing JavaScript code to be evaluated
+      - `scope` (optional): dictionary representing the scope in which
+        `code` should be evaluated - a mapping from identifiers (as
+        strings) to values
+      - `**kwargs` (optional): scope variables can also be passed as
+        keyword arguments
+
+    .. versionadded:: 1.9
+       Ability to pass scope values using keyword arguments.
+    """
+
+    def __new__(cls, code, scope=None, **kwargs):
+        if not isinstance(code, str):
+            raise TypeError("code must be an "
+                            "instance of %s" % (str.__name__,))
+
+        self = str.__new__(cls, code)
+
+        try:
+            self.__scope = code.scope
+        except AttributeError:
+            self.__scope = {}
+
+        if scope is not None:
+            if not isinstance(scope, dict):
+                raise TypeError("scope must be an instance of dict")
+            self.__scope.update(scope)
+
+        self.__scope.update(kwargs)
+
+        return self
+
+    @property
+    def scope(self):
+        """Scope dictionary for this instance.
+        """
+        return self.__scope
+
+    def __repr__(self):
+        return "Code(%s, %r)" % (str.__repr__(self), self.__scope)
+
+    def __eq__(self, other):
+        if isinstance(other, Code):
+            return (self.__scope, str(self)) == (other.__scope, str(other))
+        return False
+
+    def __ne__(self, other):
+        return not self == other
--- a/asyncio_mongo/_bson/dbref.py
+++ b/asyncio_mongo/_bson/dbref.py
@@ -0,0 +1,144 @@
+# Copyright 2009-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tools for manipulating DBRefs (references to MongoDB documents)."""
+
+from copy import deepcopy
+
+from asyncio_mongo._bson.son import SON
+
+
+class DBRef(object):
+    """A reference to a document stored in MongoDB.
+    """
+
+    def __init__(self, collection, id, database=None, _extra={}, **kwargs):
+        """Initialize a new :class:`DBRef`.
+
+        Raises :class:`TypeError` if `collection` or `database` is not
+        an instance of :class:`basestring` (:class:`str` in python 3).
+        `database` is optional and allows references to documents to work
+        across databases. Any additional keyword arguments will create
+        additional fields in the resultant embedded document.
+
+        :Parameters:
+          - `collection`: name of the collection the document is stored in
+          - `id`: the value of the document's ``"_id"`` field
+          - `database` (optional): name of the database to reference
+          - `**kwargs` (optional): additional keyword arguments will
+            create additional, custom fields
+
+        .. versionchanged:: 1.8
+           Now takes keyword arguments to specify additional fields.
+        .. versionadded:: 1.1.1
+           The `database` parameter.
+
+        .. mongodoc:: dbrefs
+        """
+        if not isinstance(collection, str):
+            raise TypeError("collection must be an "
+                            "instance of %s" % (str.__name__,))
+        if database is not None and not isinstance(database, str):
+            raise TypeError("database must be an "
+                            "instance of %s" % (str.__name__,))
+
+        self.__collection = collection
+        self.__id = id
+        self.__database = database
+        kwargs.update(_extra)
+        self.__kwargs = kwargs
+
+    @property
+    def collection(self):
+        """Get the name of this DBRef's collection as unicode.
+        """
+        return self.__collection
+
+    @property
+    def id(self):
+        """Get this DBRef's _id.
+        """
+        return self.__id
+
+    @property
+    def database(self):
+        """Get the name of this DBRef's database.
+
+        Returns None if this DBRef doesn't specify a database.
+
+        .. versionadded:: 1.1.1
+        """
+        return self.__database
+
+    def __getattr__(self, key):
+        try:
+            return self.__kwargs[key]
+        except KeyError:
+            raise AttributeError(key)
+
+    # Have to provide __setstate__ to avoid
+    # infinite recursion since we override
+    # __getattr__.
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+
+    def as_doc(self):
+        """Get the SON document representation of this DBRef.
+
+        Generally not needed by application developers
+        """
+        doc = SON([("$ref", self.collection),
+                   ("$id", self.id)])
+        if self.database is not None:
+            doc["$db"] = self.database
+        doc.update(self.__kwargs)
+        return doc
+
+    def __repr__(self):
+        extra = "".join([", %s=%r" % (k, v)
+                         for k, v in self.__kwargs.items()])
+        if self.database is None:
+            return "DBRef(%r, %r%s)" % (self.collection, self.id, extra)
+        return "DBRef(%r, %r, %r%s)" % (self.collection, self.id,
+                                        self.database, extra)
+
+    def __eq__(self, other):
+        if isinstance(other, DBRef):
+            us = (self.__database, self.__collection,
+                  self.__id, self.__kwargs)
+            them = (other.__database, other.__collection,
+                    other.__id, other.__kwargs)
+            return us == them
+        return NotImplemented
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __hash__(self):
+        """Get a hash value for this :class:`DBRef`.
+
+        .. versionadded:: 1.1
+        """
+        return hash((self.__collection, self.__id, self.__database,
+                     tuple(sorted(self.__kwargs.items()))))
+
+    def __deepcopy__(self, memo):
+        """Support function for `copy.deepcopy()`.
+
+        .. versionadded:: 1.10
+        """
+        return DBRef(deepcopy(self.__collection, memo),
+                     deepcopy(self.__id, memo),
+                     deepcopy(self.__database, memo),
+                     deepcopy(self.__kwargs, memo))
--- a/asyncio_mongo/_bson/errors.py
+++ b/asyncio_mongo/_bson/errors.py
@@ -0,0 +1,40 @@
+# Copyright 2009-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Exceptions raised by the BSON package."""
+
+
+class BSONError(Exception):
+    """Base class for all BSON exceptions.
+    """
+
+
+class InvalidBSON(BSONError):
+    """Raised when trying to create a BSON object from invalid data.
+    """
+
+
+class InvalidStringData(BSONError):
+    """Raised when trying to encode a string containing non-UTF8 data.
+    """
+
+
+class InvalidDocument(BSONError):
+    """Raised when trying to create a BSON object from an invalid document.
+    """
+
+
+class InvalidId(BSONError):
+    """Raised when trying to create an ObjectId from invalid data.
+    """
--- a/asyncio_mongo/_bson/json_util.py
+++ b/asyncio_mongo/_bson/json_util.py
@@ -0,0 +1,220 @@
+# Copyright 2009-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tools for using Python's :mod:`json` module with BSON documents.
+
+This module provides two helper methods `dumps` and `loads` that wrap the
+native :mod:`json` methods and provide explicit BSON conversion to and from
+json.  This allows for specialized encoding and decoding of BSON documents
+into `Mongo Extended JSON
+<http://www.mongodb.org/display/DOCS/Mongo+Extended+JSON>`_'s *Strict*
+mode.  This lets you encode / decode BSON documents to JSON even when
+they use special BSON types.
+
+Example usage (serialization)::
+
+.. doctest::
+
+   >>> from asyncio_mongo._bson import Binary, Code
+   >>> from asyncio_mongo._bson.json_util import dumps
+   >>> dumps([{'foo': [1, 2]},
+   ...        {'bar': {'hello': 'world'}},
+   ...        {'code': Code("function x() { return 1; }")},
+   ...        {'bin': Binary("\x00\x01\x02\x03\x04")}])
+   '[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$scope": {}, "$code": "function x() { return 1; }"}}, {"bin": {"$type": "00", "$binary": "AAECAwQ="}}]'
+
+Example usage (deserialization)::
+
+.. doctest::
+
+   >>> from asyncio_mongo._bson.json_util import loads
+   >>> loads('[{"foo": [1, 2]}, {"bar": {"hello": "world"}}, {"code": {"$scope": {}, "$code": "function x() { return 1; }"}}, {"bin": {"$type": "00", "$binary": "AAECAwQ="}}]')
+   [{u'foo': [1, 2]}, {u'bar': {u'hello': u'world'}}, {u'code': Code('function x() { return 1; }', {})}, {u'bin': Binary('\x00\x01\x02\x03\x04', 0)}]
+
+Alternatively, you can manually pass the `default` to :func:`json.dumps`.
+It won't handle :class:`~bson.binary.Binary` and :class:`~bson.code.Code`
+instances (as they are extended strings you can't provide custom defaults),
+but it will be faster as there is less recursion.
+
+.. versionchanged:: 2.3
+   Added dumps and loads helpers to automatically handle conversion to and
+   from json and supports :class:`~bson.binary.Binary` and
+   :class:`~bson.code.Code`
+
+.. versionchanged:: 1.9
+   Handle :class:`uuid.UUID` instances, whenever possible.
+
+.. versionchanged:: 1.8
+   Handle timezone aware datetime instances on encode, decode to
+   timezone aware datetime instances.
+
+.. versionchanged:: 1.8
+   Added support for encoding/decoding :class:`~bson.max_key.MaxKey`
+   and :class:`~bson.min_key.MinKey`, and for encoding
+   :class:`~bson.timestamp.Timestamp`.
+
+.. versionchanged:: 1.2
+   Added support for encoding/decoding datetimes and regular expressions.
+"""
+
+import base64
+import calendar
+import datetime
+import re
+
+json_lib = True
+try:
+    import json
+except ImportError:
+    try:
+        import simplejson as json
+    except ImportError:
+        json_lib = False
+
+import asyncio_mongo._bson as bson
+from asyncio_mongo._bson import EPOCH_AWARE, RE_TYPE
+from asyncio_mongo._bson.binary import Binary
+from asyncio_mongo._bson.code import Code
+from asyncio_mongo._bson.dbref import DBRef
+from asyncio_mongo._bson.max_key import MaxKey
+from asyncio_mongo._bson.min_key import MinKey
+from asyncio_mongo._bson.objectid import ObjectId
+from asyncio_mongo._bson.timestamp import Timestamp
+
+from asyncio_mongo._bson.py3compat import PY3, binary_type, string_types
+
+
+_RE_OPT_TABLE = {
+    "i": re.I,
+    "l": re.L,
+    "m": re.M,
+    "s": re.S,
+    "u": re.U,
+    "x": re.X,
+}
+
+
+def dumps(obj, *args, **kwargs):
+    """Helper function that wraps :class:`json.dumps`.
+
+    Recursive function that handles all BSON types including
+    :class:`~bson.binary.Binary` and :class:`~bson.code.Code`.
+    """
+    if not json_lib:
+        raise Exception("No json library available")
+    return json.dumps(_json_convert(obj), *args, **kwargs)
+
+
+def loads(s, *args, **kwargs):
+    """Helper function that wraps :class:`json.loads`.
+
+    Automatically passes the object_hook for BSON type conversion.
+    """
+    if not json_lib:
+        raise Exception("No json library available")
+    kwargs['object_hook'] = object_hook
+    return json.loads(s, *args, **kwargs)
+
+
+def _json_convert(obj):
+    """Recursive helper method that converts BSON types so they can be
+    converted into json.
+    """
+    if hasattr(obj, 'iteritems') or hasattr(obj, 'items'):  # PY3 support
+        return dict(((k, _json_convert(v)) for k, v in obj.items()))
+    elif hasattr(obj, '__iter__') and not isinstance(obj, string_types):
+        return list((_json_convert(v) for v in obj))
+    try:
+        return default(obj)
+    except TypeError:
+        return obj
+
+
+def object_hook(dct):
+    if "$oid" in dct:
+        return ObjectId(str(dct["$oid"]))
+    if "$ref" in dct:
+        return DBRef(dct["$ref"], dct["$id"], dct.get("$db", None))
+    if "$date" in dct:
+        secs = float(dct["$date"]) / 1000.0
+        return EPOCH_AWARE + datetime.timedelta(seconds=secs)
+    if "$regex" in dct:
+        flags = 0
+        # PyMongo always adds $options but some other tools may not.
+        for opt in dct.get("$options", ""):
+            flags |= _RE_OPT_TABLE.get(opt, 0)
+        return re.compile(dct["$regex"], flags)
+    if "$minKey" in dct:
+        return MinKey()
+    if "$maxKey" in dct:
+        return MaxKey()
+    if "$binary" in dct:
+        if isinstance(dct["$type"], int):
+            dct["$type"] = "%02x" % dct["$type"]
+        subtype = int(dct["$type"], 16)
+        if subtype >= 0xffffff80:  # Handle mongoexport values
+            subtype = int(dct["$type"][6:], 16)
+        return Binary(base64.b64decode(dct["$binary"].encode()), subtype)
+    if "$code" in dct:
+        return Code(dct["$code"], dct.get("$scope"))
+    if bson.has_uuid() and "$uuid" in dct:
+        return bson.uuid.UUID(dct["$uuid"])
+    return dct
+
+
+def default(obj):
+    if isinstance(obj, ObjectId):
+        return {"$oid": str(obj)}
+    if isinstance(obj, DBRef):
+        return _json_convert(obj.as_doc())
+    if isinstance(obj, datetime.datetime):
+        # TODO share this code w/ bson.py?
+        if obj.utcoffset() is not None:
+            obj = obj - obj.utcoffset()
+        millis = int(calendar.timegm(obj.timetuple()) * 1000 +
+                     obj.microsecond / 1000)
+        return {"$date": millis}
+    if isinstance(obj, RE_TYPE):
+        flags = ""
+        if obj.flags & re.IGNORECASE:
+            flags += "i"
+        if obj.flags & re.LOCALE:
+            flags += "l"
+        if obj.flags & re.MULTILINE:
+            flags += "m"
+        if obj.flags & re.DOTALL:
+            flags += "s"
+        if obj.flags & re.UNICODE:
+            flags += "u"
+        if obj.flags & re.VERBOSE:
+            flags += "x"
+        return {"$regex": obj.pattern,
+                "$options": flags}
+    if isinstance(obj, MinKey):
+        return {"$minKey": 1}
+    if isinstance(obj, MaxKey):
+        return {"$maxKey": 1}
+    if isinstance(obj, Timestamp):
+        return {"t": obj.time, "i": obj.inc}
+    if isinstance(obj, Code):
+        return {'$code': "%s" % obj, '$scope': obj.scope}
+    if isinstance(obj, Binary):
+        return {'$binary': base64.b64encode(obj).decode(),
+                '$type': "%02x" % obj.subtype}
+    if PY3 and isinstance(obj, binary_type):
+        return {'$binary': base64.b64encode(obj).decode(),
+                '$type': "00"}
+    if bson.has_uuid() and isinstance(obj, bson.uuid.UUID):
+        return {"$uuid": obj.hex}
+    raise TypeError("%r is not JSON serializable" % obj)
--- a/asyncio_mongo/_bson/max_key.py
+++ b/asyncio_mongo/_bson/max_key.py
@@ -0,0 +1,32 @@
+# Copyright 2010-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Representation for the MongoDB internal MaxKey type.
+"""
+
+
+class MaxKey(object):
+    """MongoDB internal MaxKey type.
+    """
+
+    def __eq__(self, other):
+        if isinstance(other, MaxKey):
+            return True
+        return NotImplemented
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __repr__(self):
+        return "MaxKey()"
--- a/asyncio_mongo/_bson/min_key.py
+++ b/asyncio_mongo/_bson/min_key.py
@@ -0,0 +1,32 @@
+# Copyright 2010-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Representation for the MongoDB internal MinKey type.
+"""
+
+
+class MinKey(object):
+    """MongoDB internal MinKey type.
+    """
+
+    def __eq__(self, other):
+        if isinstance(other, MinKey):
+            return True
+        return NotImplemented
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __repr__(self):
+        return "MinKey()"
--- a/asyncio_mongo/_bson/objectid.py
+++ b/asyncio_mongo/_bson/objectid.py
@@ -0,0 +1,289 @@
+# Copyright 2009-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tools for working with MongoDB `ObjectIds
+<http://dochub.mongodb.org/core/objectids>`_.
+"""
+
+import binascii
+import calendar
+import datetime
+try:
+    import hashlib
+    _md5func = hashlib.md5
+except ImportError:  # for Python < 2.5
+    import md5
+    _md5func = md5.new
+import os
+import random
+import socket
+import struct
+import threading
+import time
+
+from asyncio_mongo._bson.errors import InvalidId
+from asyncio_mongo._bson.py3compat import (PY3, b, binary_type, text_type,
+                            bytes_from_hex, string_types)
+from asyncio_mongo._bson.tz_util import utc
+
+EMPTY = b("")
+ZERO  = b("\x00")
+
+def _machine_bytes():
+    """Get the machine portion of an ObjectId.
+    """
+    machine_hash = _md5func()
+    if PY3:
+        # gethostname() returns a unicode string in python 3.x
+        # while update() requires a byte string.
+        machine_hash.update(socket.gethostname().encode())
+    else:
+        # Calling encode() here will fail with non-ascii hostnames
+        machine_hash.update(socket.gethostname())
+    return machine_hash.digest()[0:3]
+
+
+class ObjectId(object):
+    """A MongoDB ObjectId.
+    """
+
+    _inc = random.randint(0, 0xFFFFFF)
+    _inc_lock = threading.Lock()
+
+    _machine_bytes = _machine_bytes()
+
+    __slots__ = ('__id')
+
+    def __init__(self, oid=None):
+        """Initialize a new ObjectId.
+
+        If `oid` is ``None``, create a new (unique) ObjectId. If `oid`
+        is an instance of (:class:`basestring` (:class:`str` or :class:`bytes`
+        in python 3), :class:`ObjectId`) validate it and use that.  Otherwise,
+        a :class:`TypeError` is raised. If `oid` is invalid,
+        :class:`~bson.errors.InvalidId` is raised.
+
+        :Parameters:
+          - `oid` (optional): a valid ObjectId (12 byte binary or 24 character
+            hex string)
+
+        .. versionadded:: 1.2.1
+           The `oid` parameter can be a ``unicode`` instance (that contains
+           only hexadecimal digits).
+
+        .. mongodoc:: objectids
+        """
+        if oid is None:
+            self.__generate()
+        else:
+            self.__validate(oid)
+
+    @classmethod
+    def from_datetime(cls, generation_time):
+        """Create a dummy ObjectId instance with a specific generation time.
+
+        This method is useful for doing range queries on a field
+        containing :class:`ObjectId` instances.
+
+        .. warning::
+           It is not safe to insert a document containing an ObjectId
+           generated using this method. This method deliberately
+           eliminates the uniqueness guarantee that ObjectIds
+           generally provide. ObjectIds generated with this method
+           should be used exclusively in queries.
+
+        `generation_time` will be converted to UTC. Naive datetime
+        instances will be treated as though they already contain UTC.
+
+        An example using this helper to get documents where ``"_id"``
+        was generated before January 1, 2010 would be:
+
+        >>> gen_time = datetime.datetime(2010, 1, 1)
+        >>> dummy_id = ObjectId.from_datetime(gen_time)
+        >>> result = collection.find({"_id": {"$lt": dummy_id}})
+
+        :Parameters:
+          - `generation_time`: :class:`~datetime.datetime` to be used
+            as the generation time for the resulting ObjectId.
+
+        .. versionchanged:: 1.8
+           Properly handle timezone aware values for
+           `generation_time`.
+
+        .. versionadded:: 1.6
+        """
+        if generation_time.utcoffset() is not None:
+            generation_time = generation_time - generation_time.utcoffset()
+        ts = calendar.timegm(generation_time.timetuple())
+        oid = struct.pack(">i", int(ts)) + ZERO * 8
+        return cls(oid)
+
+    @classmethod
+    def is_valid(cls, oid):
+        """Checks if a `oid` string is valid or not.
+
+        :Parameters:
+          - `oid`: the object id to validate
+
+        .. versionadded:: 2.3
+        """
+        try:
+            ObjectId(oid)
+            return True
+        except (InvalidId, TypeError):
+            return False
+
+    def __generate(self):
+        """Generate a new value for this ObjectId.
+        """
+        oid = EMPTY
+
+        # 4 bytes current time
+        oid += struct.pack(">i", int(time.time()))
+
+        # 3 bytes machine
+        oid += ObjectId._machine_bytes
+
+        # 2 bytes pid
+        oid += struct.pack(">H", os.getpid() % 0xFFFF)
+
+        # 3 bytes inc
+        ObjectId._inc_lock.acquire()
+        oid += struct.pack(">i", ObjectId._inc)[1:4]
+        ObjectId._inc = (ObjectId._inc + 1) % 0xFFFFFF
+        ObjectId._inc_lock.release()
+
+        self.__id = oid
+
+    def __validate(self, oid):
+        """Validate and use the given id for this ObjectId.
+
+        Raises TypeError if id is not an instance of
+        (:class:`basestring` (:class:`str` or :class:`bytes`
+        in python 3), ObjectId) and InvalidId if it is not a
+        valid ObjectId.
+
+        :Parameters:
+          - `oid`: a valid ObjectId
+        """
+        if isinstance(oid, ObjectId):
+            self.__id = oid.__id
+        elif isinstance(oid, string_types):
+            if len(oid) == 12:
+                if isinstance(oid, binary_type):
+                    self.__id = oid
+                else:
+                    raise InvalidId("%s is not a valid ObjectId" % oid)
+            elif len(oid) == 24:
+                try:
+                    self.__id = bytes_from_hex(oid)
+                except (TypeError, ValueError):
+                    raise InvalidId("%s is not a valid ObjectId" % oid)
+            else:
+                raise InvalidId("%s is not a valid ObjectId" % oid)
+        else:
+            raise TypeError("id must be an instance of (%s, %s, ObjectId), "
+                            "not %s" % (binary_type.__name__,
+                                        text_type.__name__, type(oid)))
+
+    @property
+    def binary(self):
+        """12-byte binary representation of this ObjectId.
+        """
+        return self.__id
+
+    @property
+    def generation_time(self):
+        """A :class:`datetime.datetime` instance representing the time of
+        generation for this :class:`ObjectId`.
+
+        The :class:`datetime.datetime` is timezone aware, and
+        represents the generation time in UTC. It is precise to the
+        second.
+
+        .. versionchanged:: 1.8
+           Now return an aware datetime instead of a naive one.
+
+        .. versionadded:: 1.2
+        """
+        t = struct.unpack(">i", self.__id[0:4])[0]
+        return datetime.datetime.fromtimestamp(t, utc)
+
+    def __getstate__(self):
+        """return value of object for pickling.
+        needed explicitly because __slots__() defined.
+        """
+        return self.__id
+
+    def __setstate__(self, value):
+        """explicit state set from pickling
+        """
+        # Provide backwards compatability with OIDs
+        # pickled with pymongo-1.9 or older.
+        if isinstance(value, dict):
+            oid = value["_ObjectId__id"]
+        else:
+            oid = value
+        # ObjectIds pickled in python 2.x used `str` for __id.
+        # In python 3.x this has to be converted to `bytes`
+        # by encoding latin-1.
+        if PY3 and isinstance(oid, text_type):
+            self.__id = oid.encode('latin-1')
+        else:
+            self.__id = oid
+
+    def __str__(self):
+        if PY3:
+            return binascii.hexlify(self.__id).decode()
+        return binascii.hexlify(self.__id)
+
+    def __repr__(self):
+        return "ObjectId('%s')" % (str(self),)
+
+    def __eq__(self, other):
+        if isinstance(other, ObjectId):
+            return self.__id == other.__id
+        return NotImplemented
+
+    def __ne__(self, other):
+        if isinstance(other, ObjectId):
+            return self.__id != other.__id
+        return NotImplemented
+
+    def __lt__(self, other):
+        if isinstance(other, ObjectId):
+            return self.__id < other.__id
+        return NotImplemented
+
+    def __le__(self, other):
+        if isinstance(other, ObjectId):
+            return self.__id <= other.__id
+        return NotImplemented
+
+    def __gt__(self, other):
+        if isinstance(other, ObjectId):
+            return self.__id > other.__id
+        return NotImplemented
+
+    def __ge__(self, other):
+        if isinstance(other, ObjectId):
+            return self.__id >= other.__id
+        return NotImplemented
+
+    def __hash__(self):
+        """Get a hash value for this :class:`ObjectId`.
+
+        .. versionadded:: 1.1
+        """
+        return hash(self.__id)
--- a/asyncio_mongo/_bson/py3compat.py
+++ b/asyncio_mongo/_bson/py3compat.py
@@ -0,0 +1,60 @@
+# Copyright 2009-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you
+# may not use this file except in compliance with the License.  You
+# may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.  See the License for the specific language governing
+# permissions and limitations under the License.
+
+"""Utility functions and definitions for python3 compatibility."""
+
+import sys
+
+PY3 = sys.version_info[0] == 3
+
+if PY3:
+    import codecs
+
+    from io import BytesIO as StringIO
+
+    def b(s):
+        # BSON and socket operations deal in binary data. In
+        # python 3 that means instances of `bytes`. In python
+        # 2.6 and 2.7 you can create an alias for `bytes` using
+        # the b prefix (e.g. b'foo'). Python 2.4 and 2.5 don't
+        # provide this marker so we provide this compat function.
+        # In python 3.x b('foo') results in b'foo'.
+        # See http://python3porting.com/problems.html#nicer-solutions
+        return codecs.latin_1_encode(s)[0]
+
+    def bytes_from_hex(h):
+        return bytes.fromhex(h)
+
+    binary_type = bytes
+    text_type   = str
+
+else:
+    try:
+        from io import StringIO
+    except ImportError:
+        from io import StringIO
+
+    def b(s):
+        # See comments above. In python 2.x b('foo') is just 'foo'.
+        return s
+
+    def bytes_from_hex(h):
+        return h.decode('hex')
+
+    binary_type = str
+    # 2to3 will convert this to "str". That's okay
+    # since we won't ever get here under python3.
+    text_type   = str
+
+string_types = (binary_type, text_type)
--- a/asyncio_mongo/_bson/son.py
+++ b/asyncio_mongo/_bson/son.py
@@ -0,0 +1,243 @@
+# Copyright 2009-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tools for creating and manipulating SON, the Serialized Ocument Notation.
+
+Regular dictionaries can be used instead of SON objects, but not when the order
+of keys is important. A SON object can be used just like a normal Python
+dictionary."""
+
+import copy
+import re
+
+# This sort of sucks, but seems to be as good as it gets...
+# This is essentially the same as re._pattern_type
+RE_TYPE = type(re.compile(""))
+
+
+class SON(dict):
+    """SON data.
+
+    A subclass of dict that maintains ordering of keys and provides a
+    few extra niceties for dealing with SON. SON objects can be
+    converted to and from asyncio_mongo._bson.
+
+    The mapping from Python types to BSON types is as follows:
+
+    ===================================  =============  ===================
+    Python Type                          BSON Type      Supported Direction
+    ===================================  =============  ===================
+    None                                 null           both
+    bool                                 boolean        both
+    int [#int]_                          int32 / int64  py -> bson
+    long                                 int64          both
+    float                                number (real)  both
+    string                               string         py -> bson
+    unicode                              string         both
+    list                                 array          both
+    dict / `SON`                         object         both
+    datetime.datetime [#dt]_ [#dt2]_     date           both
+    compiled re                          regex          both
+    `bson.binary.Binary`                 binary         both
+    `bson.objectid.ObjectId`             oid            both
+    `bson.dbref.DBRef`                   dbref          both
+    None                                 undefined      bson -> py
+    unicode                              code           bson -> py
+    `bson.code.Code`                     code           py -> bson
+    unicode                              symbol         bson -> py
+    bytes (Python 3) [#bytes]_           binary         both
+    ===================================  =============  ===================
+
+    Note that to save binary data it must be wrapped as an instance of
+    `bson.binary.Binary`. Otherwise it will be saved as a BSON string
+    and retrieved as unicode.
+
+    .. [#int] A Python int will be saved as a BSON int32 or BSON int64 depending
+       on its size. A BSON int32 will always decode to a Python int. In Python 2.x
+       a BSON int64 will always decode to a Python long. In Python 3.x a BSON
+       int64 will decode to a Python int since there is no longer a long type.
+    .. [#dt] datetime.datetime instances will be rounded to the nearest
+       millisecond when saved
+    .. [#dt2] all datetime.datetime instances are treated as *naive*. clients
+       should always use UTC.
+    .. [#bytes] The bytes type from Python 3.x is encoded as BSON binary with
+       subtype 0. In Python 3.x it will be decoded back to bytes. In Python 2.x
+       it will be decoded to an instance of :class:`~bson.binary.Binary` with
+       subtype 0.
+    """
+
+    def __init__(self, data=None, **kwargs):
+        self.__keys = []
+        dict.__init__(self)
+        self.update(data)
+        self.update(kwargs)
+
+    def __new__(cls, *args, **kwargs):
+        instance = super(SON, cls).__new__(cls, *args, **kwargs)
+        instance.__keys = []
+        return instance
+
+    def __repr__(self):
+        result = []
+        for key in self.__keys:
+            result.append("(%r, %r)" % (key, self[key]))
+        return "SON([%s])" % ", ".join(result)
+
+    def __setitem__(self, key, value):
+        if key not in self:
+            self.__keys.append(key)
+        dict.__setitem__(self, key, value)
+
+    def __delitem__(self, key):
+        self.__keys.remove(key)
+        dict.__delitem__(self, key)
+
+    def keys(self):
+        return list(self.__keys)
+
+    def copy(self):
+        other = SON()
+        other.update(self)
+        return other
+
+    # TODO this is all from UserDict.DictMixin. it could probably be made more
+    # efficient.
+    # second level definitions support higher levels
+    def __iter__(self):
+        for k in list(self.keys()):
+            yield k
+
+    def has_key(self, key):
+        return key in list(self.keys())
+
+    def __contains__(self, key):
+        return key in list(self.keys())
+
+    # third level takes advantage of second level definitions
+    def iteritems(self):
+        for k in self:
+            yield (k, self[k])
+
+    def iterkeys(self):
+        return self.__iter__()
+
+    # fourth level uses definitions from lower levels
+    def itervalues(self):
+        for _, v in self.items():
+            yield v
+
+    def values(self):
+        return [v for _, v in self.items()]
+
+    def items(self):
+        return [(key, self[key]) for key in self]
+
+    def clear(self):
+        for key in list(self.keys()):
+            del self[key]
+
+    def setdefault(self, key, default=None):
+        try:
+            return self[key]
+        except KeyError:
+            self[key] = default
+        return default
+
+    def pop(self, key, *args):
+        if len(args) > 1:
+            raise TypeError("pop expected at most 2 arguments, got "\
+                                + repr(1 + len(args)))
+        try:
+            value = self[key]
+        except KeyError:
+            if args:
+                return args[0]
+            raise
+        del self[key]
+        return value
+
+    def popitem(self):
+        try:
+            k, v = next(iter(self.items()))
+        except StopIteration:
+            raise KeyError('container is empty')
+        del self[k]
+        return (k, v)
+
+    def update(self, other=None, **kwargs):
+        # Make progressively weaker assumptions about "other"
+        if other is None:
+            pass
+        elif hasattr(other, 'iteritems'):  # iteritems saves memory and lookups
+            for k, v in other.items():
+                self[k] = v
+        elif hasattr(other, 'keys'):
+            for k in list(other.keys()):
+                self[k] = other[k]
+        else:
+            for k, v in other:
+                self[k] = v
+        if kwargs:
+            self.update(kwargs)
+
+    def get(self, key, default=None):
+        try:
+            return self[key]
+        except KeyError:
+            return default
+
+    def __eq__(self, other):
+        """Comparison to another SON is order-sensitive while comparison to a
+        regular dictionary is order-insensitive.
+        """
+        if isinstance(other, SON):
+            return len(self) == len(other) and list(self.items()) == list(other.items())
+        return self.to_dict() == other
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __len__(self):
+        return len(list(self.keys()))
+
+    def to_dict(self):
+        """Convert a SON document to a normal Python dictionary instance.
+
+        This is trickier than just *dict(...)* because it needs to be
+        recursive.
+        """
+
+        def transform_value(value):
+            if isinstance(value, list):
+                return [transform_value(v) for v in value]
+            if isinstance(value, SON):
+                value = dict(value)
+            if isinstance(value, dict):
+                for k, v in value.items():
+                    value[k] = transform_value(v)
+            return value
+
+        return transform_value(dict(self))
+
+    def __deepcopy__(self, memo):
+        out = SON()
+        val_id = id(self)
+        if val_id in memo:
+            return memo.get(val_id)
+        memo[val_id] = out
+        for k, v in self.items():
+            if not isinstance(v, RE_TYPE):
+                v = copy.deepcopy(v, memo)
+            out[k] = v
+        return out
--- a/asyncio_mongo/_bson/timestamp.py
+++ b/asyncio_mongo/_bson/timestamp.py
@@ -0,0 +1,97 @@
+# Copyright 2010-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tools for representing MongoDB internal Timestamps.
+"""
+
+import calendar
+import datetime
+
+from asyncio_mongo._bson.tz_util import utc
+
+UPPERBOUND = 4294967296
+
+class Timestamp(object):
+    """MongoDB internal timestamps used in the opLog.
+    """
+
+    def __init__(self, time, inc):
+        """Create a new :class:`Timestamp`.
+
+        This class is only for use with the MongoDB opLog. If you need
+        to store a regular timestamp, please use a
+        :class:`~datetime.datetime`.
+
+        Raises :class:`TypeError` if `time` is not an instance of
+        :class: `int` or :class:`~datetime.datetime`, or `inc` is not
+        an instance of :class:`int`. Raises :class:`ValueError` if
+        `time` or `inc` is not in [0, 2**32).
+
+        :Parameters:
+          - `time`: time in seconds since epoch UTC, or a naive UTC
+            :class:`~datetime.datetime`, or an aware
+            :class:`~datetime.datetime`
+          - `inc`: the incrementing counter
+
+        .. versionchanged:: 1.7
+           `time` can now be a :class:`~datetime.datetime` instance.
+        """
+        if isinstance(time, datetime.datetime):
+            if time.utcoffset() is not None:
+                time = time - time.utcoffset()
+            time = int(calendar.timegm(time.timetuple()))
+        if not isinstance(time, int):
+            raise TypeError("time must be an instance of int")
+        if not isinstance(inc, int):
+            raise TypeError("inc must be an instance of int")
+        if not 0 <= time < UPPERBOUND:
+            raise ValueError("time must be contained in [0, 2**32)")
+        if not 0 <= inc < UPPERBOUND:
+            raise ValueError("inc must be contained in [0, 2**32)")
+
+        self.__time = time
+        self.__inc = inc
+
+    @property
+    def time(self):
+        """Get the time portion of this :class:`Timestamp`.
+        """
+        return self.__time
+
+    @property
+    def inc(self):
+        """Get the inc portion of this :class:`Timestamp`.
+        """
+        return self.__inc
+
+    def __eq__(self, other):
+        if isinstance(other, Timestamp):
+            return (self.__time == other.time and self.__inc == other.inc)
+        else:
+            return NotImplemented
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __repr__(self):
+        return "Timestamp(%s, %s)" % (self.__time, self.__inc)
+
+    def as_datetime(self):
+        """Return a :class:`~datetime.datetime` instance corresponding
+        to the time portion of this :class:`Timestamp`.
+
+        .. versionchanged:: 1.8
+           The returned datetime is now timezone aware.
+        """
+        return datetime.datetime.fromtimestamp(self.__time, utc)
--- a/asyncio_mongo/_bson/tz_util.py
+++ b/asyncio_mongo/_bson/tz_util.py
@@ -0,0 +1,52 @@
+# Copyright 2010-2012 10gen, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Timezone related utilities for BSON."""
+
+from datetime import (timedelta,
+                      tzinfo)
+
+ZERO = timedelta(0)
+
+
+class FixedOffset(tzinfo):
+    """Fixed offset timezone, in minutes east from UTC.
+
+    Implementation based from the Python `standard library documentation
+    <http://docs.python.org/library/datetime.html#tzinfo-objects>`_.
+    Defining __getinitargs__ enables pickling / copying.
+    """
+
+    def __init__(self, offset, name):
+        if isinstance(offset, timedelta):
+            self.__offset = offset
+        else:
+            self.__offset = timedelta(minutes=offset)
+        self.__name = name
+
+    def __getinitargs__(self):
+        return self.__offset, self.__name
+
+    def utcoffset(self, dt):
+        return self.__offset
+
+    def tzname(self, dt):
+        return self.__name
+
+    def dst(self, dt):
+        return ZERO
+
+
+utc = FixedOffset(0, "UTC")
+"""Fixed offset timezone representing UTC."""