diff --git a/.gitignore b/.gitignore index a281b97..44dbce3 100755 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,6 @@ _trial_temp/ *.iws .idea venv - +build +dist +*.egg-info diff --git a/README.rst b/README.rst index 50615b2..e61c028 100644 --- a/README.rst +++ b/README.rst @@ -15,6 +15,18 @@ Docs and examples ================= There are some examples in the *examples/* directory. +Features +======== +- Works for the asyncio (PEP3156) event loop +- No dependencies +- Connection pooling +- Automatic conversion from unicode (Python) to bytes (inside Redis.) +- Bytes and str protocols. +- Completely tested +- Blocking calls and transactions supported +- Streaming of some multi bulk replies +- Pubsub support + Credits ======= Thanks to (in no particular order): diff --git a/asyncio_mongo/_bson/_cbson.so b/asyncio_mongo/_bson/_cbson.so deleted file mode 100755 index 66492ea..0000000 Binary files a/asyncio_mongo/_bson/_cbson.so and /dev/null differ diff --git a/asyncio_mongo/_bson/_cbsonmodule.c b/asyncio_mongo/_bson/_cbsonmodule.c new file mode 100644 index 0000000..30c8ad6 --- /dev/null +++ b/asyncio_mongo/_bson/_cbsonmodule.c @@ -0,0 +1,2509 @@ +/* + * Copyright 2009-2012 10gen, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains C implementations of some of the functions + * needed by the bson module. If possible, these implementations + * should be used to speed up BSON encoding and decoding. + */ + +#include "Python.h" +#include "datetime.h" + +#include "buffer.h" +#include "time64.h" +#include "encoding_helpers.h" + +#define _CBSON_MODULE +#include "_cbsonmodule.h" + +/* New module state and initialization code. + * See the module-initialization-and-state + * section in the following doc: + * http://docs.python.org/release/3.1.3/howto/cporting.html + * which references the following pep: + * http://www.python.org/dev/peps/pep-3121/ + * */ +struct module_state { + PyObject* Binary; + PyObject* Code; + PyObject* ObjectId; + PyObject* DBRef; + PyObject* RECompile; + PyObject* Regex; + PyObject* UUID; + PyObject* Timestamp; + PyObject* MinKey; + PyObject* MaxKey; + PyObject* UTC; + PyTypeObject* REType; +}; + +/* The Py_TYPE macro was introduced in CPython 2.6 */ +#ifndef Py_TYPE +#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif + +#if PY_MAJOR_VERSION >= 3 +#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) +#else +#define GETSTATE(m) (&_state) +static struct module_state _state; +#endif + +/* Maximum number of regex flags */ +#define FLAGS_SIZE 7 + +#define JAVA_LEGACY 5 +#define CSHARP_LEGACY 6 +#define BSON_MAX_SIZE 2147483647 +/* The smallest possible BSON document, i.e. "{}" */ +#define BSON_MIN_SIZE 5 + +/* Get an error class from the bson.errors module. + * + * Returns a new ref */ +static PyObject* _error(char* name) { + PyObject* error; + PyObject* errors = PyImport_ImportModule("bson.errors"); + if (!errors) { + return NULL; + } + error = PyObject_GetAttrString(errors, name); + Py_DECREF(errors); + return error; +} + +/* Safely downcast from Py_ssize_t to int, setting an + * exception and returning -1 on error. */ +static int +_downcast_and_check(Py_ssize_t size, int extra) { + if (size > BSON_MAX_SIZE || ((BSON_MAX_SIZE - extra) < size)) { + PyObject* InvalidStringData = _error("InvalidStringData"); + if (InvalidStringData) { + PyErr_SetString(InvalidStringData, + "String length must be <= 2147483647"); + Py_DECREF(InvalidStringData); + } + return -1; + } + return (int)size + extra; +} + +static PyObject* elements_to_dict(PyObject* self, const char* string, + unsigned max, PyObject* as_class, + unsigned char tz_aware, + unsigned char uuid_subtype, + unsigned char compile_re); + +static int _write_element_to_buffer(PyObject* self, buffer_t buffer, + int type_byte, PyObject* value, + unsigned char check_keys, + unsigned char uuid_subtype); + +/* Date stuff */ +static PyObject* datetime_from_millis(long long millis) { + /* To encode a datetime instance like datetime(9999, 12, 31, 23, 59, 59, 999999) + * we follow these steps: + * 1. Calculate a timestamp in seconds: 253402300799 + * 2. Multiply that by 1000: 253402300799000 + * 3. Add in microseconds divided by 1000 253402300799999 + * + * (Note: BSON doesn't support microsecond accuracy, hence the rounding.) + * + * To decode we could do: + * 1. Get seconds: timestamp / 1000: 253402300799 + * 2. Get micros: (timestamp % 1000) * 1000: 999000 + * Resulting in datetime(9999, 12, 31, 23, 59, 59, 999000) -- the expected result + * + * Now what if the we encode (1, 1, 1, 1, 1, 1, 111111)? + * 1. and 2. gives: -62135593139000 + * 3. Gives us: -62135593138889 + * + * Now decode: + * 1. Gives us: -62135593138 + * 2. Gives us: -889000 + * Resulting in datetime(1, 1, 1, 1, 1, 2, 15888216) -- an invalid result + * + * If instead to decode we do: + * diff = ((millis % 1000) + 1000) % 1000: 111 + * seconds = (millis - diff) / 1000: -62135593139 + * micros = diff * 1000 111000 + * Resulting in datetime(1, 1, 1, 1, 1, 1, 111000) -- the expected result + */ + int diff = (int)(((millis % 1000) + 1000) % 1000); + int microseconds = diff * 1000; + Time64_T seconds = (millis - diff) / 1000; + struct TM timeinfo; + gmtime64_r(&seconds, &timeinfo); + + return PyDateTime_FromDateAndTime(timeinfo.tm_year + 1900, + timeinfo.tm_mon + 1, + timeinfo.tm_mday, + timeinfo.tm_hour, + timeinfo.tm_min, + timeinfo.tm_sec, + microseconds); +} + +static long long millis_from_datetime(PyObject* datetime) { + struct TM timeinfo; + long long millis; + + timeinfo.tm_year = PyDateTime_GET_YEAR(datetime) - 1900; + timeinfo.tm_mon = PyDateTime_GET_MONTH(datetime) - 1; + timeinfo.tm_mday = PyDateTime_GET_DAY(datetime); + timeinfo.tm_hour = PyDateTime_DATE_GET_HOUR(datetime); + timeinfo.tm_min = PyDateTime_DATE_GET_MINUTE(datetime); + timeinfo.tm_sec = PyDateTime_DATE_GET_SECOND(datetime); + + millis = timegm64(&timeinfo) * 1000; + millis += PyDateTime_DATE_GET_MICROSECOND(datetime) / 1000; + return millis; +} + +/* Just make this compatible w/ the old API. */ +int buffer_write_bytes(buffer_t buffer, const char* data, int size) { + if (buffer_write(buffer, data, size)) { + PyErr_NoMemory(); + return 0; + } + return 1; +} + +static int write_unicode(buffer_t buffer, PyObject* py_string) { + int size; + const char* data; + PyObject* encoded = PyUnicode_AsUTF8String(py_string); + if (!encoded) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + data = PyBytes_AS_STRING(encoded); +#else + data = PyString_AS_STRING(encoded); +#endif + if (!data) + goto unicodefail; + +#if PY_MAJOR_VERSION >= 3 + if ((size = _downcast_and_check(PyBytes_GET_SIZE(encoded), 1)) == -1) +#else + if ((size = _downcast_and_check(PyString_GET_SIZE(encoded), 1)) == -1) +#endif + goto unicodefail; + + if (!buffer_write_bytes(buffer, (const char*)&size, 4)) + goto unicodefail; + + if (!buffer_write_bytes(buffer, data, size)) + goto unicodefail; + + Py_DECREF(encoded); + return 1; + +unicodefail: + Py_DECREF(encoded); + return 0; +} + +/* returns 0 on failure */ +static int write_string(buffer_t buffer, PyObject* py_string) { + int size; + const char* data; +#if PY_MAJOR_VERSION >= 3 + if (PyUnicode_Check(py_string)){ + return write_unicode(buffer, py_string); + } + data = PyBytes_AsString(py_string); +#else + data = PyString_AsString(py_string); +#endif + if (!data) { + return 0; + } + +#if PY_MAJOR_VERSION >= 3 + if ((size = _downcast_and_check(PyBytes_Size(py_string), 1)) == -1) +#else + if ((size = _downcast_and_check(PyString_Size(py_string), 1)) == -1) +#endif + return 0; + + if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { + return 0; + } + if (!buffer_write_bytes(buffer, data, size)) { + return 0; + } + return 1; +} + +/* + * Are we in the main interpreter or a sub-interpreter? + * Useful for deciding if we can use cached pure python + * types in mod_wsgi. + */ +static int +_in_main_interpreter(void) { + static PyInterpreterState* main_interpreter = NULL; + PyInterpreterState* interpreter; + + if (main_interpreter == NULL) { + interpreter = PyInterpreterState_Head(); + + while (PyInterpreterState_Next(interpreter)) + interpreter = PyInterpreterState_Next(interpreter); + + main_interpreter = interpreter; + } + + return (main_interpreter == PyThreadState_Get()->interp); +} + +/* + * Get a reference to a pure python type. If we are in the + * main interpreter return the cached object, otherwise import + * the object we need and return it instead. + */ +static PyObject* +_get_object(PyObject* object, char* module_name, char* object_name) { + if (_in_main_interpreter()) { + Py_XINCREF(object); + return object; + } else { + PyObject* imported = NULL; + PyObject* module = PyImport_ImportModule(module_name); + if (!module) + return NULL; + imported = PyObject_GetAttrString(module, object_name); + Py_DECREF(module); + return imported; + } +} + +/* Load a Python object to cache. + * + * Returns non-zero on failure. */ +static int _load_object(PyObject** object, char* module_name, char* object_name) { + PyObject* module; + + module = PyImport_ImportModule(module_name); + if (!module) { + return 1; + } + + *object = PyObject_GetAttrString(module, object_name); + Py_DECREF(module); + + return (*object) ? 0 : 2; +} + +/* Load all Python objects to cache. + * + * Returns non-zero on failure. */ +static int _load_python_objects(PyObject* module) { + PyObject* empty_string; + PyObject* compiled; + struct module_state *state = GETSTATE(module); + + if (_load_object(&state->Binary, "bson.binary", "Binary") || + _load_object(&state->Code, "bson.code", "Code") || + _load_object(&state->ObjectId, "bson.objectid", "ObjectId") || + _load_object(&state->DBRef, "bson.dbref", "DBRef") || + _load_object(&state->Timestamp, "bson.timestamp", "Timestamp") || + _load_object(&state->MinKey, "bson.min_key", "MinKey") || + _load_object(&state->MaxKey, "bson.max_key", "MaxKey") || + _load_object(&state->UTC, "bson.tz_util", "utc") || + _load_object(&state->RECompile, "re", "compile") || + _load_object(&state->Regex, "bson.regex", "Regex")) { + return 1; + } + /* If we couldn't import uuid then we must be on 2.4. Just ignore. */ + if (_load_object(&state->UUID, "uuid", "UUID") == 1) { + state->UUID = NULL; + PyErr_Clear(); + } + /* Reload our REType hack too. */ +#if PY_MAJOR_VERSION >= 3 + empty_string = PyBytes_FromString(""); +#else + empty_string = PyString_FromString(""); +#endif + if (empty_string == NULL) { + state->REType = NULL; + return 1; + } + compiled = PyObject_CallFunction(state->RECompile, "O", empty_string); + if (compiled == NULL) { + state->REType = NULL; + Py_DECREF(empty_string); + return 1; + } + Py_INCREF(Py_TYPE(compiled)); + state->REType = Py_TYPE(compiled); + Py_DECREF(empty_string); + Py_DECREF(compiled); + return 0; +} + +static int write_element_to_buffer(PyObject* self, buffer_t buffer, + int type_byte, PyObject* value, + unsigned char check_keys, + unsigned char uuid_subtype) { + int result; + if(Py_EnterRecursiveCall(" while encoding an object to BSON ")) + return 0; + result = _write_element_to_buffer(self, buffer, type_byte, + value, check_keys, uuid_subtype); + Py_LeaveRecursiveCall(); + return result; +} + +static void +_fix_java(const char* in, char* out) { + int i, j; + for (i = 0, j = 7; i < j; i++, j--) { + out[i] = in[j]; + out[j] = in[i]; + } + for (i = 8, j = 15; i < j; i++, j--) { + out[i] = in[j]; + out[j] = in[i]; + } +} + +static void +_set_cannot_encode(PyObject* value) { + PyObject* InvalidDocument = _error("InvalidDocument"); + if (InvalidDocument) { + PyObject* repr = PyObject_Repr(value); + if (repr) { +#if PY_MAJOR_VERSION >= 3 + PyObject* errmsg = PyUnicode_FromString("Cannot encode object: "); +#else + PyObject* errmsg = PyString_FromString("Cannot encode object: "); +#endif + if (errmsg) { +#if PY_MAJOR_VERSION >= 3 + PyObject* error = PyUnicode_Concat(errmsg, repr); + if (error) { + PyErr_SetObject(InvalidDocument, error); + Py_DECREF(error); + } + Py_DECREF(errmsg); + Py_DECREF(repr); +#else + PyString_ConcatAndDel(&errmsg, repr); + if (errmsg) { + PyErr_SetObject(InvalidDocument, errmsg); + Py_DECREF(errmsg); + } +#endif + } else { + Py_DECREF(repr); + } + } + Py_DECREF(InvalidDocument); + } +} + +/* + * Encode a builtin Python regular expression or our custom Regex class. + * + * Sets exception and returns 0 on failure. + */ +static int _write_regex_to_buffer( + buffer_t buffer, int type_byte, PyObject* value) { + + PyObject* py_flags; + PyObject* py_pattern; + PyObject* encoded_pattern; + long int_flags; + char flags[FLAGS_SIZE]; + char check_utf8 = 0; + const char* pattern_data; + int pattern_length, flags_length; + result_t status; + + /* + * Both the builtin re type and our Regex class have attributes + * "flags" and "pattern". + */ + py_flags = PyObject_GetAttrString(value, "flags"); + if (!py_flags) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + int_flags = PyLong_AsLong(py_flags); +#else + int_flags = PyInt_AsLong(py_flags); +#endif + Py_DECREF(py_flags); + py_pattern = PyObject_GetAttrString(value, "pattern"); + if (!py_pattern) { + return 0; + } + + if (PyUnicode_Check(py_pattern)) { + encoded_pattern = PyUnicode_AsUTF8String(py_pattern); + Py_DECREF(py_pattern); + if (!encoded_pattern) { + return 0; + } + } else { + encoded_pattern = py_pattern; + check_utf8 = 1; + } + +#if PY_MAJOR_VERSION >= 3 + if (!(pattern_data = PyBytes_AsString(encoded_pattern))) { + Py_DECREF(encoded_pattern); + return 0; + } + if ((pattern_length = _downcast_and_check(PyBytes_Size(encoded_pattern), 0)) == -1) { + Py_DECREF(encoded_pattern); + return 0; + } +#else + if (!(pattern_data = PyString_AsString(encoded_pattern))) { + Py_DECREF(encoded_pattern); + return 0; + } + if ((pattern_length = _downcast_and_check(PyString_Size(encoded_pattern), 0)) == -1) { + Py_DECREF(encoded_pattern); + return 0; + } +#endif + status = check_string((const unsigned char*)pattern_data, + pattern_length, check_utf8, 1); + if (status == NOT_UTF_8) { + PyObject* InvalidStringData = _error("InvalidStringData"); + if (InvalidStringData) { + PyErr_SetString(InvalidStringData, + "regex patterns must be valid UTF-8"); + Py_DECREF(InvalidStringData); + } + Py_DECREF(encoded_pattern); + return 0; + } else if (status == HAS_NULL) { + PyObject* InvalidDocument = _error("InvalidDocument"); + if (InvalidDocument) { + PyErr_SetString(InvalidDocument, + "regex patterns must not contain the NULL byte"); + Py_DECREF(InvalidDocument); + } + Py_DECREF(encoded_pattern); + return 0; + } + + if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) { + Py_DECREF(encoded_pattern); + return 0; + } + Py_DECREF(encoded_pattern); + + flags[0] = 0; + + if (int_flags & 2) { + STRCAT(flags, FLAGS_SIZE, "i"); + } + if (int_flags & 4) { + STRCAT(flags, FLAGS_SIZE, "l"); + } + if (int_flags & 8) { + STRCAT(flags, FLAGS_SIZE, "m"); + } + if (int_flags & 16) { + STRCAT(flags, FLAGS_SIZE, "s"); + } + if (int_flags & 32) { + STRCAT(flags, FLAGS_SIZE, "u"); + } + if (int_flags & 64) { + STRCAT(flags, FLAGS_SIZE, "x"); + } + flags_length = (int)strlen(flags) + 1; + if (!buffer_write_bytes(buffer, flags, flags_length)) { + return 0; + } + *(buffer_get_buffer(buffer) + type_byte) = 0x0B; + return 1; +} + +/* TODO our platform better be little-endian w/ 4-byte ints! */ +/* Write a single value to the buffer (also write its type_byte, for which + * space has already been reserved. + * + * returns 0 on failure */ +static int _write_element_to_buffer(PyObject* self, buffer_t buffer, + int type_byte, PyObject* value, + unsigned char check_keys, + unsigned char uuid_subtype) { + struct module_state *state = GETSTATE(self); + + /* + * Don't use PyObject_IsInstance for our custom types. It causes + * problems with python sub interpreters. Our custom types should + * have a _type_marker attribute, which we can switch on instead. + */ + if (PyObject_HasAttrString(value, "_type_marker")) { + long type; + PyObject* type_marker = PyObject_GetAttrString(value, "_type_marker"); + if (type_marker == NULL) + return 0; +#if PY_MAJOR_VERSION >= 3 + type = PyLong_AsLong(type_marker); +#else + type = PyInt_AsLong(type_marker); +#endif + Py_DECREF(type_marker); + /* + * Py(Long|Int)_AsLong returns -1 for error but -1 is a valid value + * so we call PyErr_Occurred to differentiate. + * + * One potential reason for an error is the user passing an invalid + * type that overrides __getattr__ (e.g. pymongo.collection.Collection) + */ + if (type == -1 && PyErr_Occurred()) { + PyErr_Clear(); + _set_cannot_encode(value); + return 0; + } + switch (type) { + case 5: + { + /* Binary */ + PyObject* subtype_object; + long subtype; + const char* data; + int size; + + *(buffer_get_buffer(buffer) + type_byte) = 0x05; + subtype_object = PyObject_GetAttrString(value, "subtype"); + if (!subtype_object) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + subtype = PyLong_AsLong(subtype_object); +#else + subtype = PyInt_AsLong(subtype_object); +#endif + if (subtype == -1) { + Py_DECREF(subtype_object); + return 0; + } +#if PY_MAJOR_VERSION >= 3 + size = _downcast_and_check(PyBytes_Size(value), 0); +#else + size = _downcast_and_check(PyString_Size(value), 0); +#endif + if (size == -1) { + Py_DECREF(subtype_object); + return 0; + } + + Py_DECREF(subtype_object); + if (subtype == 2) { +#if PY_MAJOR_VERSION >= 3 + int other_size = _downcast_and_check(PyBytes_Size(value), 4); +#else + int other_size = _downcast_and_check(PyString_Size(value), 4); +#endif + if (other_size == -1) + return 0; + if (!buffer_write_bytes(buffer, (const char*)&other_size, 4)) { + return 0; + } + if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { + return 0; + } + } + if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { + return 0; + } + if (subtype != 2) { + if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { + return 0; + } + } +#if PY_MAJOR_VERSION >= 3 + data = PyBytes_AsString(value); +#else + data = PyString_AsString(value); +#endif + if (!data) { + return 0; + } + if (!buffer_write_bytes(buffer, data, size)) { + return 0; + } + return 1; + } + case 7: + { + /* ObjectId */ + const char* data; + PyObject* pystring = PyObject_GetAttrString(value, "_ObjectId__id"); + if (!pystring) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + data = PyBytes_AsString(pystring); +#else + data = PyString_AsString(pystring); +#endif + if (!data) { + Py_DECREF(pystring); + return 0; + } + if (!buffer_write_bytes(buffer, data, 12)) { + Py_DECREF(pystring); + return 0; + } + Py_DECREF(pystring); + *(buffer_get_buffer(buffer) + type_byte) = 0x07; + return 1; + } + case 11: + { + /* Regex */ + return _write_regex_to_buffer(buffer, type_byte, value); + } + case 13: + { + /* Code */ + int start_position, + length_location, + length; + + PyObject* scope = PyObject_GetAttrString(value, "scope"); + if (!scope) { + return 0; + } + + if (!PyDict_Size(scope)) { + Py_DECREF(scope); + *(buffer_get_buffer(buffer) + type_byte) = 0x0D; + return write_string(buffer, value); + } + + *(buffer_get_buffer(buffer) + type_byte) = 0x0F; + + start_position = buffer_get_position(buffer); + /* save space for length */ + length_location = buffer_save_space(buffer, 4); + if (length_location == -1) { + PyErr_NoMemory(); + Py_DECREF(scope); + return 0; + } + + if (!write_string(buffer, value)) { + Py_DECREF(scope); + return 0; + } + + if (!write_dict(self, buffer, scope, 0, uuid_subtype, 0)) { + Py_DECREF(scope); + return 0; + } + Py_DECREF(scope); + + length = buffer_get_position(buffer) - start_position; + memcpy(buffer_get_buffer(buffer) + length_location, &length, 4); + return 1; + } + case 17: + { + /* Timestamp */ + PyObject* obj; + long i; + + obj = PyObject_GetAttrString(value, "inc"); + if (!obj) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + i = PyLong_AsLong(obj); +#else + i = PyInt_AsLong(obj); +#endif + Py_DECREF(obj); + if (!buffer_write_bytes(buffer, (const char*)&i, 4)) { + return 0; + } + + obj = PyObject_GetAttrString(value, "time"); + if (!obj) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + i = PyLong_AsLong(obj); +#else + i = PyInt_AsLong(obj); +#endif + Py_DECREF(obj); + if (!buffer_write_bytes(buffer, (const char*)&i, 4)) { + return 0; + } + + *(buffer_get_buffer(buffer) + type_byte) = 0x11; + return 1; + } + case 100: + { + /* DBRef */ + PyObject* as_doc = PyObject_CallMethod(value, "as_doc", NULL); + if (!as_doc) { + return 0; + } + if (!write_dict(self, buffer, as_doc, 0, uuid_subtype, 0)) { + Py_DECREF(as_doc); + return 0; + } + Py_DECREF(as_doc); + *(buffer_get_buffer(buffer) + type_byte) = 0x03; + return 1; + } + case 255: + { + /* MinKey */ + *(buffer_get_buffer(buffer) + type_byte) = 0xFF; + return 1; + } + case 127: + { + /* MaxKey */ + *(buffer_get_buffer(buffer) + type_byte) = 0x7F; + return 1; + } + } + } + + /* No _type_marker attibute or not one of our types. */ + + if (PyBool_Check(value)) { +#if PY_MAJOR_VERSION >= 3 + const long bool = PyLong_AsLong(value); +#else + const long bool = PyInt_AsLong(value); +#endif + const char c = bool ? 0x01 : 0x00; + *(buffer_get_buffer(buffer) + type_byte) = 0x08; + return buffer_write_bytes(buffer, &c, 1); + } +#if PY_MAJOR_VERSION >= 3 + else if (PyLong_Check(value)) { + const long long_value = PyLong_AsLong(value); +#else + else if (PyInt_Check(value)) { + const long long_value = PyInt_AsLong(value); +#endif + + const int int_value = (int)long_value; + if (PyErr_Occurred() || long_value != int_value) { /* Overflow */ + long long long_long_value; + PyErr_Clear(); + long_long_value = PyLong_AsLongLong(value); + if (PyErr_Occurred()) { /* Overflow AGAIN */ + PyErr_SetString(PyExc_OverflowError, + "MongoDB can only handle up to 8-byte ints"); + return 0; + } + *(buffer_get_buffer(buffer) + type_byte) = 0x12; + return buffer_write_bytes(buffer, (const char*)&long_long_value, 8); + } + *(buffer_get_buffer(buffer) + type_byte) = 0x10; + return buffer_write_bytes(buffer, (const char*)&int_value, 4); +#if PY_MAJOR_VERSION < 3 + } else if (PyLong_Check(value)) { + const long long long_long_value = PyLong_AsLongLong(value); + if (PyErr_Occurred()) { /* Overflow */ + PyErr_SetString(PyExc_OverflowError, + "MongoDB can only handle up to 8-byte ints"); + return 0; + } + *(buffer_get_buffer(buffer) + type_byte) = 0x12; + return buffer_write_bytes(buffer, (const char*)&long_long_value, 8); +#endif + } else if (PyFloat_Check(value)) { + const double d = PyFloat_AsDouble(value); + *(buffer_get_buffer(buffer) + type_byte) = 0x01; + return buffer_write_bytes(buffer, (const char*)&d, 8); + } else if (value == Py_None) { + *(buffer_get_buffer(buffer) + type_byte) = 0x0A; + return 1; + } else if (PyDict_Check(value)) { + *(buffer_get_buffer(buffer) + type_byte) = 0x03; + return write_dict(self, buffer, value, check_keys, uuid_subtype, 0); + } else if (PyList_Check(value) || PyTuple_Check(value)) { + Py_ssize_t items, i; + int start_position, + length_location, + length; + char zero = 0; + + *(buffer_get_buffer(buffer) + type_byte) = 0x04; + start_position = buffer_get_position(buffer); + + /* save space for length */ + length_location = buffer_save_space(buffer, 4); + if (length_location == -1) { + PyErr_NoMemory(); + return 0; + } + + if ((items = PySequence_Size(value)) > BSON_MAX_SIZE) { + PyObject* BSONError = _error("BSONError"); + if (BSONError) { + PyErr_SetString(BSONError, + "Too many items to serialize."); + Py_DECREF(BSONError); + } + return 0; + } + for(i = 0; i < items; i++) { + int list_type_byte = buffer_save_space(buffer, 1); + char name[16]; + PyObject* item_value; + + if (list_type_byte == -1) { + PyErr_NoMemory(); + return 0; + } + INT2STRING(name, (int)i); + if (!buffer_write_bytes(buffer, name, (int)strlen(name) + 1)) { + return 0; + } + + if (!(item_value = PySequence_GetItem(value, i))) + return 0; + if (!write_element_to_buffer(self, buffer, list_type_byte, + item_value, check_keys, uuid_subtype)) { + Py_DECREF(item_value); + return 0; + } + Py_DECREF(item_value); + } + + /* write null byte and fill in length */ + if (!buffer_write_bytes(buffer, &zero, 1)) { + return 0; + } + length = buffer_get_position(buffer) - start_position; + memcpy(buffer_get_buffer(buffer) + length_location, &length, 4); + return 1; +#if PY_MAJOR_VERSION >= 3 + /* Python3 special case. Store bytes as BSON binary subtype 0. */ + } else if (PyBytes_Check(value)) { + int subtype = 0; + int size; + const char* data = PyBytes_AS_STRING(value); + if (!data) + return 0; + if ((size = _downcast_and_check(PyBytes_GET_SIZE(value), 0)) == -1) + return 0; + *(buffer_get_buffer(buffer) + type_byte) = 0x05; + if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { + return 0; + } + if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { + return 0; + } + if (!buffer_write_bytes(buffer, data, size)) { + return 0; + } + return 1; +#else + /* PyString_Check only works in Python 2.x. */ + } else if (PyString_Check(value)) { + result_t status; + const char* data; + int size; + if (!(data = PyString_AS_STRING(value))) + return 0; + if ((size = _downcast_and_check(PyString_GET_SIZE(value), 1)) == -1) + return 0; + *(buffer_get_buffer(buffer) + type_byte) = 0x02; + status = check_string((const unsigned char*)data, size - 1, 1, 0); + + if (status == NOT_UTF_8) { + PyObject* InvalidStringData = _error("InvalidStringData"); + if (InvalidStringData) { + PyObject* repr = PyObject_Repr(value); + char* repr_as_cstr = repr ? PyString_AsString(repr) : NULL; + if (repr_as_cstr) { + PyObject *message = PyString_FromFormat( + "strings in documents must be valid UTF-8: %s", + repr_as_cstr); + + if (message) { + PyErr_SetObject(InvalidStringData, message); + Py_DECREF(message); + } + } else { + /* repr(value) failed, use a generic message. */ + PyErr_SetString( + InvalidStringData, + "strings in documents must be valid UTF-8"); + } + Py_XDECREF(repr); + Py_DECREF(InvalidStringData); + } + return 0; + } + if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { + return 0; + } + if (!buffer_write_bytes(buffer, data, size)) { + return 0; + } + return 1; +#endif + } else if (PyUnicode_Check(value)) { + *(buffer_get_buffer(buffer) + type_byte) = 0x02; + return write_unicode(buffer, value); + } else if (PyDateTime_Check(value)) { + long long millis; + PyObject* utcoffset = PyObject_CallMethod(value, "utcoffset", NULL); + if (utcoffset == NULL) + return 0; + if (utcoffset != Py_None) { + PyObject* result = PyNumber_Subtract(value, utcoffset); + Py_DECREF(utcoffset); + if (!result) { + return 0; + } + millis = millis_from_datetime(result); + Py_DECREF(result); + } else { + millis = millis_from_datetime(value); + } + *(buffer_get_buffer(buffer) + type_byte) = 0x09; + return buffer_write_bytes(buffer, (const char*)&millis, 8); + } else if (PyObject_TypeCheck(value, state->REType)) { + return _write_regex_to_buffer(buffer, type_byte, value); + } + + /* + * Try UUID last since we have to import + * it if we're in a sub-interpreter. + * + * If we're running under python 2.4 there likely + * isn't a uuid module. + */ + if (state->UUID) { + PyObject* uuid_type = _get_object(state->UUID, "uuid", "UUID"); + if (uuid_type && PyObject_IsInstance(value, uuid_type)) { + /* Just a special case of Binary above, but + * simpler to do as a separate case. */ + PyObject* bytes; + /* Could be bytes, bytearray, str... */ + const char* data; + /* UUID is always 16 bytes */ + int size = 16; + int subtype; + + Py_DECREF(uuid_type); + + if (uuid_subtype == JAVA_LEGACY || uuid_subtype == CSHARP_LEGACY) { + subtype = 3; + } + else { + subtype = uuid_subtype; + } + + *(buffer_get_buffer(buffer) + type_byte) = 0x05; + if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { + return 0; + } + if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { + return 0; + } + + if (uuid_subtype == CSHARP_LEGACY) { + /* Legacy C# byte order */ + bytes = PyObject_GetAttrString(value, "bytes_le"); + } + else { + bytes = PyObject_GetAttrString(value, "bytes"); + } + if (!bytes) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + /* Work around http://bugs.python.org/issue7380 */ + if (PyByteArray_Check(bytes)) { + data = PyByteArray_AsString(bytes); + } + else { + data = PyBytes_AsString(bytes); + } +#else + data = PyString_AsString(bytes); +#endif + if (data == NULL) { + Py_DECREF(bytes); + return 0; + } + if (uuid_subtype == JAVA_LEGACY) { + /* Store in legacy java byte order. */ + char as_legacy_java[16]; + _fix_java(data, as_legacy_java); + if (!buffer_write_bytes(buffer, as_legacy_java, size)) { + Py_DECREF(bytes); + return 0; + } + } + else { + if (!buffer_write_bytes(buffer, data, size)) { + Py_DECREF(bytes); + return 0; + } + } + Py_DECREF(bytes); + return 1; + } else { + Py_XDECREF(uuid_type); + } + } + /* We can't determine value's type. Fail. */ + _set_cannot_encode(value); + return 0; +} + +static int check_key_name(const char* name, int name_length) { + + if (name_length > 0 && name[0] == '$') { + PyObject* InvalidDocument = _error("InvalidDocument"); + if (InvalidDocument) { +#if PY_MAJOR_VERSION >= 3 + PyObject* errmsg = PyUnicode_FromFormat( + "key '%s' must not start with '$'", name); +#else + PyObject* errmsg = PyString_FromFormat( + "key '%s' must not start with '$'", name); +#endif + if (errmsg) { + PyErr_SetObject(InvalidDocument, errmsg); + Py_DECREF(errmsg); + } + Py_DECREF(InvalidDocument); + } + return 0; + } + if (strchr(name, '.')) { + PyObject* InvalidDocument = _error("InvalidDocument"); + if (InvalidDocument) { +#if PY_MAJOR_VERSION >= 3 + PyObject* errmsg = PyUnicode_FromFormat( + "key '%s' must not contain '.'", name); +#else + PyObject* errmsg = PyString_FromFormat( + "key '%s' must not contain '.'", name); +#endif + if (errmsg) { + PyErr_SetObject(InvalidDocument, errmsg); + Py_DECREF(errmsg); + } + Py_DECREF(InvalidDocument); + } + return 0; + } + return 1; +} + +/* Write a (key, value) pair to the buffer. + * + * Returns 0 on failure */ +int write_pair(PyObject* self, buffer_t buffer, const char* name, int name_length, + PyObject* value, unsigned char check_keys, + unsigned char uuid_subtype, unsigned char allow_id) { + int type_byte; + + /* Don't write any _id elements unless we're explicitly told to - + * _id has to be written first so we do so, but don't bother + * deleting it from the dictionary being written. */ + if (!allow_id && strcmp(name, "_id") == 0) { + return 1; + } + + type_byte = buffer_save_space(buffer, 1); + if (type_byte == -1) { + PyErr_NoMemory(); + return 0; + } + if (check_keys && !check_key_name(name, name_length)) { + return 0; + } + if (!buffer_write_bytes(buffer, name, name_length + 1)) { + return 0; + } + if (!write_element_to_buffer(self, buffer, type_byte, + value, check_keys, uuid_subtype)) { + return 0; + } + return 1; +} + +int decode_and_write_pair(PyObject* self, buffer_t buffer, + PyObject* key, PyObject* value, + unsigned char check_keys, + unsigned char uuid_subtype, unsigned char top_level) { + PyObject* encoded; + const char* data; + int size; + if (PyUnicode_Check(key)) { + encoded = PyUnicode_AsUTF8String(key); + if (!encoded) { + return 0; + } +#if PY_MAJOR_VERSION >= 3 + if (!(data = PyBytes_AS_STRING(encoded))) { + Py_DECREF(encoded); + return 0; + } + if ((size = _downcast_and_check(PyBytes_GET_SIZE(encoded), 1)) == -1) { + Py_DECREF(encoded); + return 0; + } +#else + if (!(data = PyString_AS_STRING(encoded))) { + Py_DECREF(encoded); + return 0; + } + if ((size = _downcast_and_check(PyString_GET_SIZE(encoded), 1)) == -1) { + Py_DECREF(encoded); + return 0; + } +#endif + if (strlen(data) != (size_t)(size - 1)) { + PyObject* InvalidDocument = _error("InvalidDocument"); + if (InvalidDocument) { + PyErr_SetString(InvalidDocument, + "Key names must not contain the NULL byte"); + Py_DECREF(InvalidDocument); + } + Py_DECREF(encoded); + return 0; + } +#if PY_MAJOR_VERSION < 3 + } else if (PyString_Check(key)) { + result_t status; + encoded = key; + Py_INCREF(encoded); + + if (!(data = PyString_AS_STRING(encoded))) { + Py_DECREF(encoded); + return 0; + } + if ((size = _downcast_and_check(PyString_GET_SIZE(encoded), 1)) == -1) { + Py_DECREF(encoded); + return 0; + } + status = check_string((const unsigned char*)data, size - 1, 1, 1); + + if (status == NOT_UTF_8) { + PyObject* InvalidStringData = _error("InvalidStringData"); + if (InvalidStringData) { + PyErr_SetString(InvalidStringData, + "strings in documents must be valid UTF-8"); + Py_DECREF(InvalidStringData); + } + Py_DECREF(encoded); + return 0; + } else if (status == HAS_NULL) { + PyObject* InvalidDocument = _error("InvalidDocument"); + if (InvalidDocument) { + PyErr_SetString(InvalidDocument, + "Key names must not contain the NULL byte"); + Py_DECREF(InvalidDocument); + } + Py_DECREF(encoded); + return 0; + } +#endif + } else { + PyObject* InvalidDocument = _error("InvalidDocument"); + if (InvalidDocument) { + PyObject* repr = PyObject_Repr(key); + if (repr) { +#if PY_MAJOR_VERSION >= 3 + PyObject* errmsg = PyUnicode_FromString( + "documents must have only string keys, key was "); +#else + PyObject* errmsg = PyString_FromString( + "documents must have only string keys, key was "); +#endif + if (errmsg) { +#if PY_MAJOR_VERSION >= 3 + PyObject* error = PyUnicode_Concat(errmsg, repr); + if (error) { + PyErr_SetObject(InvalidDocument, error); + Py_DECREF(error); + } + Py_DECREF(errmsg); + Py_DECREF(repr); +#else + PyString_ConcatAndDel(&errmsg, repr); + if (errmsg) { + PyErr_SetObject(InvalidDocument, errmsg); + Py_DECREF(errmsg); + } +#endif + } else { + Py_DECREF(repr); + } + } + Py_DECREF(InvalidDocument); + } + return 0; + } + + /* If top_level is True, don't allow writing _id here - it was already written. */ + if (!write_pair(self, buffer, data, + size - 1, value, check_keys, uuid_subtype, !top_level)) { + Py_DECREF(encoded); + return 0; + } + + Py_DECREF(encoded); + return 1; +} + +/* returns 0 on failure */ +int write_dict(PyObject* self, buffer_t buffer, + PyObject* dict, unsigned char check_keys, + unsigned char uuid_subtype, unsigned char top_level) { + PyObject* key; + PyObject* iter; + char zero = 0; + int length; + int length_location; + + if (!PyDict_Check(dict)) { + PyObject* repr = PyObject_Repr(dict); + if (repr) { +#if PY_MAJOR_VERSION >= 3 + PyObject* errmsg = PyUnicode_FromString( + "encoder expected a mapping type but got: "); + if (errmsg) { + PyObject* error = PyUnicode_Concat(errmsg, repr); + if (error) { + PyErr_SetObject(PyExc_TypeError, error); + Py_DECREF(error); + } + Py_DECREF(errmsg); + Py_DECREF(repr); + } +#else + PyObject* errmsg = PyString_FromString( + "encoder expected a mapping type but got: "); + if (errmsg) { + PyString_ConcatAndDel(&errmsg, repr); + if (errmsg) { + PyErr_SetObject(PyExc_TypeError, errmsg); + Py_DECREF(errmsg); + } + } +#endif + else { + Py_DECREF(repr); + } + } else { + PyErr_SetString(PyExc_TypeError, + "encoder expected a mapping type"); + } + return 0; + } + + length_location = buffer_save_space(buffer, 4); + if (length_location == -1) { + PyErr_NoMemory(); + return 0; + } + + /* Write _id first if this is a top level doc. */ + if (top_level) { + PyObject* _id = PyDict_GetItemString(dict, "_id"); + if (_id) { + if (!write_pair(self, buffer, "_id", 3, + _id, check_keys, uuid_subtype, 1)) { + return 0; + } + } + } + + iter = PyObject_GetIter(dict); + if (iter == NULL) { + return 0; + } + while ((key = PyIter_Next(iter)) != NULL) { + PyObject* value = PyDict_GetItem(dict, key); + if (!value) { + PyErr_SetObject(PyExc_KeyError, key); + Py_DECREF(key); + Py_DECREF(iter); + return 0; + } + if (!decode_and_write_pair(self, buffer, key, value, + check_keys, uuid_subtype, top_level)) { + Py_DECREF(key); + Py_DECREF(iter); + return 0; + } + Py_DECREF(key); + } + Py_DECREF(iter); + + /* write null byte and fill in length */ + if (!buffer_write_bytes(buffer, &zero, 1)) { + return 0; + } + length = buffer_get_position(buffer) - length_location; + memcpy(buffer_get_buffer(buffer) + length_location, &length, 4); + return 1; +} + +static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) { + PyObject* dict; + PyObject* result; + unsigned char check_keys; + unsigned char uuid_subtype; + unsigned char top_level = 1; + buffer_t buffer; + + if (!PyArg_ParseTuple(args, "Obb|b", &dict, + &check_keys, &uuid_subtype, &top_level)) { + return NULL; + } + + buffer = buffer_new(); + if (!buffer) { + PyErr_NoMemory(); + return NULL; + } + + if (!write_dict(self, buffer, dict, check_keys, uuid_subtype, top_level)) { + buffer_free(buffer); + return NULL; + } + + /* objectify buffer */ +#if PY_MAJOR_VERSION >= 3 + result = Py_BuildValue("y#", buffer_get_buffer(buffer), + buffer_get_position(buffer)); +#else + result = Py_BuildValue("s#", buffer_get_buffer(buffer), + buffer_get_position(buffer)); +#endif + buffer_free(buffer); + return result; +} + +static PyObject* get_value(PyObject* self, const char* buffer, unsigned* position, + unsigned char type, unsigned max, PyObject* as_class, + unsigned char tz_aware, unsigned char uuid_subtype, + unsigned char compile_re) { + struct module_state *state = GETSTATE(self); + + PyObject* value = NULL; + switch (type) { + case 1: + { + double d; + if (max < 8) { + goto invalid; + } + memcpy(&d, buffer + *position, 8); + value = PyFloat_FromDouble(d); + *position += 8; + break; + } + case 2: + case 14: + { + unsigned value_length; + if (max < 4) { + goto invalid; + } + memcpy(&value_length, buffer + *position, 4); + /* Encoded string length + string */ + if (!value_length || max < value_length || max < 4 + value_length) { + goto invalid; + } + *position += 4; + /* Strings must end in \0 */ + if (buffer[*position + value_length - 1]) { + goto invalid; + } + value = PyUnicode_DecodeUTF8(buffer + *position, value_length - 1, "strict"); + if (!value) { + goto invalid; + } + *position += value_length; + break; + } + case 3: + { + PyObject* collection; + unsigned size; + if (max < 4) { + goto invalid; + } + memcpy(&size, buffer + *position, 4); + if (size < BSON_MIN_SIZE || max < size) { + goto invalid; + } + /* Check for bad eoo */ + if (buffer[*position + size - 1]) { + goto invalid; + } + value = elements_to_dict(self, buffer + *position + 4, + size - 5, as_class, tz_aware, uuid_subtype, + compile_re); + if (!value) { + goto invalid; + } + + /* Decoding for DBRefs */ + collection = PyDict_GetItemString(value, "$ref"); + if (collection) { /* DBRef */ + PyObject* dbref = NULL; + PyObject* dbref_type; + PyObject* id; + PyObject* database; + + Py_INCREF(collection); + PyDict_DelItemString(value, "$ref"); + + id = PyDict_GetItemString(value, "$id"); + if (id == NULL) { + id = Py_None; + Py_INCREF(id); + } else { + Py_INCREF(id); + PyDict_DelItemString(value, "$id"); + } + + database = PyDict_GetItemString(value, "$db"); + if (database == NULL) { + database = Py_None; + Py_INCREF(database); + } else { + Py_INCREF(database); + PyDict_DelItemString(value, "$db"); + } + + if ((dbref_type = _get_object(state->DBRef, "bson.dbref", "DBRef"))) { + dbref = PyObject_CallFunctionObjArgs(dbref_type, collection, id, database, value, NULL); + Py_DECREF(dbref_type); + } + Py_DECREF(value); + value = dbref; + + Py_DECREF(id); + Py_DECREF(collection); + Py_DECREF(database); + } + + *position += size; + break; + } + case 4: + { + unsigned size, end; + + if (max < 4) { + goto invalid; + } + memcpy(&size, buffer + *position, 4); + if (size < BSON_MIN_SIZE || max < size) { + goto invalid; + } + end = *position + size - 1; + /* Check for bad eoo */ + if (buffer[end]) { + goto invalid; + } + *position += 4; + + value = PyList_New(0); + if (!value) { + goto invalid; + } + while (*position < end) { + PyObject* to_append; + + unsigned char bson_type = (unsigned char)buffer[(*position)++]; + + size_t key_size = strlen(buffer + *position); + if (max < key_size) { + Py_DECREF(value); + goto invalid; + } + /* just skip the key, they're in order. */ + *position += (unsigned)key_size + 1; + if (Py_EnterRecursiveCall(" while decoding a list value")) { + Py_DECREF(value); + goto invalid; + } + to_append = get_value(self, buffer, position, bson_type, + max - (unsigned)key_size, + as_class, tz_aware, uuid_subtype, + compile_re); + Py_LeaveRecursiveCall(); + if (!to_append) { + Py_DECREF(value); + goto invalid; + } + PyList_Append(value, to_append); + Py_DECREF(to_append); + } + (*position)++; + break; + } + case 5: + { + PyObject* data; + PyObject* st; + PyObject* type_to_create; + unsigned length; + unsigned char subtype; + + if (max < 5) { + goto invalid; + } + memcpy(&length, buffer + *position, 4); + if (max < length) { + goto invalid; + } + + subtype = (unsigned char)buffer[*position + 4]; + *position += 5; + if (subtype == 2 && length < 4) { + goto invalid; + } +#if PY_MAJOR_VERSION >= 3 + /* Python3 special case. Decode BSON binary subtype 0 to bytes. */ + if (subtype == 0) { + value = PyBytes_FromStringAndSize(buffer + *position, length); + *position += length; + break; + } + if (subtype == 2) { + data = PyBytes_FromStringAndSize(buffer + *position + 4, length - 4); + } else { + data = PyBytes_FromStringAndSize(buffer + *position, length); + } +#else + if (subtype == 2) { + data = PyString_FromStringAndSize(buffer + *position + 4, length - 4); + } else { + data = PyString_FromStringAndSize(buffer + *position, length); + } +#endif + if (!data) { + goto invalid; + } + /* Encode as UUID, not Binary */ + if ((subtype == 3 || subtype == 4) && state->UUID) { + PyObject* kwargs; + PyObject* args = PyTuple_New(0); + /* UUID should always be 16 bytes */ + if (!args || length != 16) { + Py_DECREF(data); + goto invalid; + } + kwargs = PyDict_New(); + if (!kwargs) { + Py_DECREF(data); + Py_DECREF(args); + goto invalid; + } + + /* + * From this point, we hold refs to args, kwargs, and data. + * If anything fails, goto uuiderror to clean them up. + */ + if (uuid_subtype == CSHARP_LEGACY) { + /* Legacy C# byte order */ + if ((PyDict_SetItemString(kwargs, "bytes_le", data)) == -1) + goto uuiderror; + } + else { + if (uuid_subtype == JAVA_LEGACY) { + /* Convert from legacy java byte order */ + char big_endian[16]; + _fix_java(buffer + *position, big_endian); + /* Free the previously created PyString object */ + Py_DECREF(data); +#if PY_MAJOR_VERSION >= 3 + data = PyBytes_FromStringAndSize(big_endian, length); +#else + data = PyString_FromStringAndSize(big_endian, length); +#endif + if (data == NULL) + goto uuiderror; + } + if ((PyDict_SetItemString(kwargs, "bytes", data)) == -1) + goto uuiderror; + + } + if ((type_to_create = _get_object(state->UUID, "uuid", "UUID"))) { + value = PyObject_Call(type_to_create, args, kwargs); + Py_DECREF(type_to_create); + } + + Py_DECREF(args); + Py_DECREF(kwargs); + Py_DECREF(data); + if (!value) { + goto invalid; + } + + *position += length; + break; + + uuiderror: + Py_DECREF(args); + Py_DECREF(kwargs); + Py_XDECREF(data); + goto invalid; + } + +#if PY_MAJOR_VERSION >= 3 + st = PyLong_FromLong(subtype); +#else + st = PyInt_FromLong(subtype); +#endif + if (!st) { + Py_DECREF(data); + goto invalid; + } + if ((type_to_create = _get_object(state->Binary, "bson.binary", "Binary"))) { + value = PyObject_CallFunctionObjArgs(type_to_create, data, st, NULL); + Py_DECREF(type_to_create); + } + Py_DECREF(st); + Py_DECREF(data); + if (!value) { + goto invalid; + } + *position += length; + break; + } + case 6: + case 10: + { + value = Py_None; + Py_INCREF(value); + break; + } + case 7: + { + PyObject* objectid_type; + if (max < 12) { + goto invalid; + } + if ((objectid_type = _get_object(state->ObjectId, "bson.objectid", "ObjectId"))) { +#if PY_MAJOR_VERSION >= 3 + value = PyObject_CallFunction(objectid_type, "y#", buffer + *position, 12); +#else + value = PyObject_CallFunction(objectid_type, "s#", buffer + *position, 12); +#endif + Py_DECREF(objectid_type); + } + *position += 12; + break; + } + case 8: + { + value = buffer[(*position)++] ? Py_True : Py_False; + Py_INCREF(value); + break; + } + case 9: + { + PyObject* utc_type; + PyObject* naive; + PyObject* replace; + PyObject* args; + PyObject* kwargs; + long long millis; + if (max < 8) { + goto invalid; + } + memcpy(&millis, buffer + *position, 8); + naive = datetime_from_millis(millis); + *position += 8; + if (!tz_aware) { /* In the naive case, we're done here. */ + value = naive; + break; + } + + if (!naive) { + goto invalid; + } + replace = PyObject_GetAttrString(naive, "replace"); + Py_DECREF(naive); + if (!replace) { + goto invalid; + } + args = PyTuple_New(0); + if (!args) { + Py_DECREF(replace); + goto invalid; + } + kwargs = PyDict_New(); + if (!kwargs) { + Py_DECREF(replace); + Py_DECREF(args); + goto invalid; + } + utc_type = _get_object(state->UTC, "bson.tz_util", "UTC"); + if (!utc_type || PyDict_SetItemString(kwargs, "tzinfo", utc_type) == -1) { + Py_DECREF(replace); + Py_DECREF(args); + Py_DECREF(kwargs); + Py_XDECREF(utc_type); + goto invalid; + } + Py_XDECREF(utc_type); + value = PyObject_Call(replace, args, kwargs); + Py_DECREF(replace); + Py_DECREF(args); + Py_DECREF(kwargs); + break; + } + case 11: + { + PyObject* compile_func; + PyObject* pattern; + int flags; + size_t flags_length, i; + size_t pattern_length = strlen(buffer + *position); + if (pattern_length > BSON_MAX_SIZE || max < pattern_length) { + goto invalid; + } + pattern = PyUnicode_DecodeUTF8(buffer + *position, pattern_length, "strict"); + if (!pattern) { + goto invalid; + } + *position += (unsigned)pattern_length + 1; + flags_length = strlen(buffer + *position); + if (flags_length > BSON_MAX_SIZE || + (BSON_MAX_SIZE - pattern_length) < flags_length) { + Py_DECREF(pattern); + goto invalid; + } + if (max < pattern_length + flags_length) { + Py_DECREF(pattern); + goto invalid; + } + flags = 0; + for (i = 0; i < flags_length; i++) { + if (buffer[*position + i] == 'i') { + flags |= 2; + } else if (buffer[*position + i] == 'l') { + flags |= 4; + } else if (buffer[*position + i] == 'm') { + flags |= 8; + } else if (buffer[*position + i] == 's') { + flags |= 16; + } else if (buffer[*position + i] == 'u') { + flags |= 32; + } else if (buffer[*position + i] == 'x') { + flags |= 64; + } + } + *position += (unsigned)flags_length + 1; + + /* + * Use re.compile() if we're configured to compile regular + * expressions, else create an instance of our Regex class. + */ + if (compile_re) { + compile_func = _get_object(state->RECompile, "re", "compile"); + } else { + compile_func = _get_object(state->Regex, "bson.regex", "Regex"); + } + + if (compile_func) { + value = PyObject_CallFunction(compile_func, "Oi", pattern, flags); + Py_DECREF(compile_func); + } + Py_DECREF(pattern); + break; + } + case 12: + { + unsigned coll_length; + PyObject* collection; + PyObject* id = NULL; + PyObject* objectid_type; + PyObject* dbref_type; + + if (max < 4) { + goto invalid; + } + memcpy(&coll_length, buffer + *position, 4); + /* Encoded string length + string + 12 byte ObjectId */ + if (!coll_length || max < coll_length || max < 4 + coll_length + 12) { + goto invalid; + } + *position += 4; + /* Strings must end in \0 */ + if (buffer[*position + coll_length - 1]) { + goto invalid; + } + + collection = PyUnicode_DecodeUTF8(buffer + *position, + coll_length - 1, "strict"); + if (!collection) { + goto invalid; + } + *position += coll_length; + + if ((objectid_type = _get_object(state->ObjectId, "bson.objectid", "ObjectId"))) { +#if PY_MAJOR_VERSION >= 3 + id = PyObject_CallFunction(objectid_type, "y#", buffer + *position, 12); +#else + id = PyObject_CallFunction(objectid_type, "s#", buffer + *position, 12); +#endif + Py_DECREF(objectid_type); + } + if (!id) { + Py_DECREF(collection); + goto invalid; + } + *position += 12; + if ((dbref_type = _get_object(state->DBRef, "bson.dbref", "DBRef"))) { + value = PyObject_CallFunctionObjArgs(dbref_type, collection, id, NULL); + Py_DECREF(dbref_type); + } + Py_DECREF(collection); + Py_DECREF(id); + break; + } + case 13: + { + PyObject* code; + PyObject* code_type; + unsigned value_length; + if (max < 4) { + goto invalid; + } + memcpy(&value_length, buffer + *position, 4); + /* Encoded string length + string */ + if (!value_length || max < value_length || max < 4 + value_length) { + goto invalid; + } + *position += 4; + /* Strings must end in \0 */ + if (buffer[*position + value_length - 1]) { + goto invalid; + } + code = PyUnicode_DecodeUTF8(buffer + *position, value_length - 1, "strict"); + if (!code) { + goto invalid; + } + *position += value_length; + if ((code_type = _get_object(state->Code, "bson.code", "Code"))) { + value = PyObject_CallFunctionObjArgs(code_type, code, NULL, NULL); + Py_DECREF(code_type); + } + Py_DECREF(code); + break; + } + case 15: + { + unsigned c_w_s_size; + unsigned code_size; + unsigned scope_size; + PyObject* code; + PyObject* scope; + PyObject* code_type; + + if (max < 8) { + goto invalid; + } + + memcpy(&c_w_s_size, buffer + *position, 4); + *position += 4; + + if (max < c_w_s_size) { + goto invalid; + } + + memcpy(&code_size, buffer + *position, 4); + /* code_w_scope length + code length + code + scope length */ + if (!code_size || max < code_size || max < 4 + 4 + code_size + 4) { + goto invalid; + } + *position += 4; + /* Strings must end in \0 */ + if (buffer[*position + code_size - 1]) { + goto invalid; + } + code = PyUnicode_DecodeUTF8(buffer + *position, code_size - 1, "strict"); + if (!code) { + goto invalid; + } + *position += code_size; + + memcpy(&scope_size, buffer + *position, 4); + if (scope_size < BSON_MIN_SIZE) { + Py_DECREF(code); + goto invalid; + } + /* code length + code + scope length + scope */ + if ((4 + code_size + 4 + scope_size) != c_w_s_size) { + Py_DECREF(code); + goto invalid; + } + + /* Check for bad eoo */ + if (buffer[*position + scope_size - 1]) { + goto invalid; + } + scope = elements_to_dict(self, buffer + *position + 4, + scope_size - 5, (PyObject*)&PyDict_Type, + tz_aware, uuid_subtype, compile_re); + if (!scope) { + Py_DECREF(code); + goto invalid; + } + *position += scope_size; + + if ((code_type = _get_object(state->Code, "bson.code", "Code"))) { + value = PyObject_CallFunctionObjArgs(code_type, code, scope, NULL); + Py_DECREF(code_type); + } + Py_DECREF(code); + Py_DECREF(scope); + break; + } + case 16: + { + int i; + if (max < 4) { + goto invalid; + } + memcpy(&i, buffer + *position, 4); +#if PY_MAJOR_VERSION >= 3 + value = PyLong_FromLong(i); +#else + value = PyInt_FromLong(i); +#endif + if (!value) { + goto invalid; + } + *position += 4; + break; + } + case 17: + { + unsigned int time, inc; + PyObject* timestamp_type; + if (max < 8) { + goto invalid; + } + memcpy(&inc, buffer + *position, 4); + memcpy(&time, buffer + *position + 4, 4); + if ((timestamp_type = _get_object(state->Timestamp, "bson.timestamp", "Timestamp"))) { + value = PyObject_CallFunction(timestamp_type, "II", time, inc); + Py_DECREF(timestamp_type); + } + *position += 8; + break; + } + case 18: + { + long long ll; + if (max < 8) { + goto invalid; + } + memcpy(&ll, buffer + *position, 8); + value = PyLong_FromLongLong(ll); + if (!value) { + goto invalid; + } + *position += 8; + break; + } + case 255: + { + PyObject* minkey_type = _get_object(state->MinKey, "bson.min_key", "MinKey"); + if (!minkey_type) + goto invalid; + value = PyObject_CallFunctionObjArgs(minkey_type, NULL); + Py_DECREF(minkey_type); + break; + } + case 127: + { + PyObject* maxkey_type = _get_object(state->MaxKey, "bson.max_key", "MaxKey"); + if (!maxkey_type) + goto invalid; + value = PyObject_CallFunctionObjArgs(maxkey_type, NULL); + Py_DECREF(maxkey_type); + break; + } + default: + { + PyObject* InvalidDocument = _error("InvalidDocument"); + if (InvalidDocument) { + PyErr_SetString(InvalidDocument, + "no c decoder for this type yet"); + Py_DECREF(InvalidDocument); + } + goto invalid; + } + } + + if (value) { + return value; + } + + invalid: + + /* + * Wrap any non-InvalidBSON errors in InvalidBSON. + */ + if (PyErr_Occurred()) { + PyObject *etype, *evalue, *etrace; + PyObject *InvalidBSON; + + /* + * Calling _error clears the error state, so fetch it first. + */ + PyErr_Fetch(&etype, &evalue, &etrace); + InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + if (!PyErr_GivenExceptionMatches(etype, InvalidBSON)) { + /* + * Raise InvalidBSON(str(e)). + */ + Py_DECREF(etype); + etype = InvalidBSON; + + if (evalue) { + PyObject *msg = PyObject_Str(evalue); + Py_DECREF(evalue); + evalue = msg; + } + PyErr_NormalizeException(&etype, &evalue, &etrace); + } else { + /* + * The current exception matches InvalidBSON, so we don't need + * this reference after all. + */ + Py_DECREF(InvalidBSON); + } + } + /* Steals references to args. */ + PyErr_Restore(etype, evalue, etrace); + } else { + PyObject *InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + PyErr_SetString(InvalidBSON, "invalid length or type code"); + Py_DECREF(InvalidBSON); + } + } + return NULL; +} + +static PyObject* _elements_to_dict(PyObject* self, const char* string, + unsigned max, PyObject* as_class, + unsigned char tz_aware, + unsigned char uuid_subtype, + unsigned char compile_re) { + unsigned position = 0; + PyObject* dict = PyObject_CallObject(as_class, NULL); + if (!dict) { + return NULL; + } + while (position < max) { + PyObject* name; + PyObject* value; + + unsigned char type = (unsigned char)string[position++]; + size_t name_length = strlen(string + position); + if (name_length > BSON_MAX_SIZE || position + name_length >= max) { + PyObject* InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + PyErr_SetNone(InvalidBSON); + Py_DECREF(InvalidBSON); + } + Py_DECREF(dict); + return NULL; + } + name = PyUnicode_DecodeUTF8(string + position, name_length, "strict"); + if (!name) { + Py_DECREF(dict); + return NULL; + } + position += (unsigned)name_length + 1; + value = get_value(self, string, &position, type, + max - position, as_class, tz_aware, uuid_subtype, + compile_re); + if (!value) { + Py_DECREF(name); + Py_DECREF(dict); + return NULL; + } + + PyObject_SetItem(dict, name, value); + Py_DECREF(name); + Py_DECREF(value); + } + return dict; +} + +static PyObject* elements_to_dict(PyObject* self, const char* string, + unsigned max, PyObject* as_class, + unsigned char tz_aware, + unsigned char uuid_subtype, + unsigned char compile_re) { + PyObject* result; + if (Py_EnterRecursiveCall(" while decoding a BSON document")) + return NULL; + result = _elements_to_dict(self, string, max, + as_class, tz_aware, uuid_subtype, compile_re); + Py_LeaveRecursiveCall(); + return result; +} + +static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { + int size; + Py_ssize_t total_size; + const char* string; + PyObject* bson; + PyObject* as_class; + unsigned char tz_aware; + unsigned char uuid_subtype; + unsigned char compile_re; + + PyObject* dict; + PyObject* remainder; + PyObject* result; + + if (!PyArg_ParseTuple( + args, "OObbb", &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) { + return NULL; + } + +#if PY_MAJOR_VERSION >= 3 + if (!PyBytes_Check(bson)) { + PyErr_SetString(PyExc_TypeError, "argument to _bson_to_dict must be a bytes object"); +#else + if (!PyString_Check(bson)) { + PyErr_SetString(PyExc_TypeError, "argument to _bson_to_dict must be a string"); +#endif + return NULL; + } +#if PY_MAJOR_VERSION >= 3 + total_size = PyBytes_Size(bson); +#else + total_size = PyString_Size(bson); +#endif + if (total_size < BSON_MIN_SIZE) { + PyObject* InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + PyErr_SetString(InvalidBSON, + "not enough data for a BSON document"); + Py_DECREF(InvalidBSON); + } + return NULL; + } + +#if PY_MAJOR_VERSION >= 3 + string = PyBytes_AsString(bson); +#else + string = PyString_AsString(bson); +#endif + if (!string) { + return NULL; + } + + memcpy(&size, string, 4); + if (size < BSON_MIN_SIZE) { + PyObject* InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + PyErr_SetString(InvalidBSON, "invalid message size"); + Py_DECREF(InvalidBSON); + } + return NULL; + } + + if (total_size < size || total_size > BSON_MAX_SIZE) { + PyObject* InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + PyErr_SetString(InvalidBSON, "objsize too large"); + Py_DECREF(InvalidBSON); + } + return NULL; + } + + if (size != total_size || string[size - 1]) { + PyObject* InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + PyErr_SetString(InvalidBSON, "bad eoo"); + Py_DECREF(InvalidBSON); + } + return NULL; + } + + dict = elements_to_dict(self, string + 4, (unsigned)size - 5, + as_class, tz_aware, uuid_subtype, compile_re); + if (!dict) { + return NULL; + } +#if PY_MAJOR_VERSION >= 3 + remainder = PyBytes_FromStringAndSize(string + size, total_size - size); +#else + remainder = PyString_FromStringAndSize(string + size, total_size - size); +#endif + if (!remainder) { + Py_DECREF(dict); + return NULL; + } + result = Py_BuildValue("OO", dict, remainder); + Py_DECREF(dict); + Py_DECREF(remainder); + return result; +} + +static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { + int size; + Py_ssize_t total_size; + const char* string; + PyObject* bson; + PyObject* dict; + PyObject* result; + PyObject* as_class = (PyObject*)&PyDict_Type; + unsigned char tz_aware = 1; + unsigned char uuid_subtype = 3; + unsigned char compile_re = 1; + + if (!PyArg_ParseTuple( + args, "O|Obbb", + &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) { + return NULL; + } + +#if PY_MAJOR_VERSION >= 3 + if (!PyBytes_Check(bson)) { + PyErr_SetString(PyExc_TypeError, "argument to decode_all must be a bytes object"); +#else + if (!PyString_Check(bson)) { + PyErr_SetString(PyExc_TypeError, "argument to decode_all must be a string"); +#endif + return NULL; + } +#if PY_MAJOR_VERSION >= 3 + total_size = PyBytes_Size(bson); + string = PyBytes_AsString(bson); +#else + total_size = PyString_Size(bson); + string = PyString_AsString(bson); +#endif + if (!string) { + return NULL; + } + + if (!(result = PyList_New(0))) + return NULL; + + while (total_size > 0) { + if (total_size < BSON_MIN_SIZE) { + PyObject* InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + PyErr_SetString(InvalidBSON, + "not enough data for a BSON document"); + Py_DECREF(InvalidBSON); + } + Py_DECREF(result); + return NULL; + } + + memcpy(&size, string, 4); + if (size < BSON_MIN_SIZE) { + PyObject* InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + PyErr_SetString(InvalidBSON, "invalid message size"); + Py_DECREF(InvalidBSON); + } + Py_DECREF(result); + return NULL; + } + + if (total_size < size) { + PyObject* InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + PyErr_SetString(InvalidBSON, "objsize too large"); + Py_DECREF(InvalidBSON); + } + Py_DECREF(result); + return NULL; + } + + if (string[size - 1]) { + PyObject* InvalidBSON = _error("InvalidBSON"); + if (InvalidBSON) { + PyErr_SetString(InvalidBSON, "bad eoo"); + Py_DECREF(InvalidBSON); + } + Py_DECREF(result); + return NULL; + } + + dict = elements_to_dict(self, string + 4, (unsigned)size - 5, + as_class, tz_aware, uuid_subtype, compile_re); + if (!dict) { + Py_DECREF(result); + return NULL; + } + PyList_Append(result, dict); + Py_DECREF(dict); + string += size; + total_size -= size; + } + + return result; +} + +static PyMethodDef _CBSONMethods[] = { + {"_dict_to_bson", _cbson_dict_to_bson, METH_VARARGS, + "convert a dictionary to a string containing its BSON representation."}, + {"_bson_to_dict", _cbson_bson_to_dict, METH_VARARGS, + "convert a BSON string to a SON object."}, + {"decode_all", _cbson_decode_all, METH_VARARGS, + "convert binary data to a sequence of documents."}, + {NULL, NULL, 0, NULL} +}; + +#if PY_MAJOR_VERSION >= 3 +#define INITERROR return NULL +static int _cbson_traverse(PyObject *m, visitproc visit, void *arg) { + Py_VISIT(GETSTATE(m)->Binary); + Py_VISIT(GETSTATE(m)->Code); + Py_VISIT(GETSTATE(m)->ObjectId); + Py_VISIT(GETSTATE(m)->DBRef); + Py_VISIT(GETSTATE(m)->RECompile); + Py_VISIT(GETSTATE(m)->Regex); + Py_VISIT(GETSTATE(m)->UUID); + Py_VISIT(GETSTATE(m)->Timestamp); + Py_VISIT(GETSTATE(m)->MinKey); + Py_VISIT(GETSTATE(m)->MaxKey); + Py_VISIT(GETSTATE(m)->UTC); + Py_VISIT(GETSTATE(m)->REType); + return 0; +} + +static int _cbson_clear(PyObject *m) { + Py_CLEAR(GETSTATE(m)->Binary); + Py_CLEAR(GETSTATE(m)->Code); + Py_CLEAR(GETSTATE(m)->ObjectId); + Py_CLEAR(GETSTATE(m)->DBRef); + Py_CLEAR(GETSTATE(m)->RECompile); + Py_CLEAR(GETSTATE(m)->Regex); + Py_CLEAR(GETSTATE(m)->UUID); + Py_CLEAR(GETSTATE(m)->Timestamp); + Py_CLEAR(GETSTATE(m)->MinKey); + Py_CLEAR(GETSTATE(m)->MaxKey); + Py_CLEAR(GETSTATE(m)->UTC); + Py_CLEAR(GETSTATE(m)->REType); + return 0; +} + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_cbson", + NULL, + sizeof(struct module_state), + _CBSONMethods, + NULL, + _cbson_traverse, + _cbson_clear, + NULL +}; + +PyMODINIT_FUNC +PyInit__cbson(void) +#else +#define INITERROR return +PyMODINIT_FUNC +init_cbson(void) +#endif +{ + PyObject *m; + PyObject *c_api_object; + static void *_cbson_API[_cbson_API_POINTER_COUNT]; + + PyDateTime_IMPORT; + if (PyDateTimeAPI == NULL) { + INITERROR; + } + + /* Export C API */ + _cbson_API[_cbson_buffer_write_bytes_INDEX] = (void *) buffer_write_bytes; + _cbson_API[_cbson_write_dict_INDEX] = (void *) write_dict; + _cbson_API[_cbson_write_pair_INDEX] = (void *) write_pair; + _cbson_API[_cbson_decode_and_write_pair_INDEX] = (void *) decode_and_write_pair; + +#if PY_VERSION_HEX >= 0x03010000 + /* PyCapsule is new in python 3.1 */ + c_api_object = PyCapsule_New((void *) _cbson_API, "_cbson._C_API", NULL); +#else + c_api_object = PyCObject_FromVoidPtr((void *) _cbson_API, NULL); +#endif + if (c_api_object == NULL) + INITERROR; + +#if PY_MAJOR_VERSION >= 3 + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule("_cbson", _CBSONMethods); +#endif + if (m == NULL) { + Py_DECREF(c_api_object); + INITERROR; + } + + /* Import several python objects */ + if (_load_python_objects(m)) { + Py_DECREF(c_api_object); +#if PY_MAJOR_VERSION >= 3 + Py_DECREF(m); +#endif + INITERROR; + } + + if (PyModule_AddObject(m, "_C_API", c_api_object) < 0) { + Py_DECREF(c_api_object); +#if PY_MAJOR_VERSION >= 3 + Py_DECREF(m); +#endif + INITERROR; + } + +#if PY_MAJOR_VERSION >= 3 + return m; +#endif +} diff --git a/asyncio_mongo/_bson/_cbsonmodule.h b/asyncio_mongo/_bson/_cbsonmodule.h new file mode 100644 index 0000000..c4db6d0 --- /dev/null +++ b/asyncio_mongo/_bson/_cbsonmodule.h @@ -0,0 +1,103 @@ +/* + * Copyright 2012 10gen, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _CBSONMODULE_H +#define _CBSONMODULE_H + +/* Py_ssize_t was new in python 2.5. See conversion + * guidlines in http://www.python.org/dev/peps/pep-0353 + * */ +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#endif + +#if defined(WIN32) || defined(_MSC_VER) +/* + * This macro is basically an implementation of asprintf for win32 + * We print to the provided buffer to get the string value as an int. + */ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) +#define INT2STRING(buffer, i) \ + _snprintf_s((buffer), \ + _scprintf("%d", (i)) + 1, \ + _scprintf("%d", (i)) + 1, \ + "%d", \ + (i)) +#define STRCAT(dest, n, src) strcat_s((dest), (n), (src)) +#else +#define INT2STRING(buffer, i) \ + _snprintf((buffer), \ + _scprintf("%d", (i)) + 1, \ + "%d", \ + (i)) +#define STRCAT(dest, n, src) strcat((dest), (src)) +#endif +#else +#define INT2STRING(buffer, i) snprintf((buffer), sizeof((buffer)), "%d", (i)) +#define STRCAT(dest, n, src) strcat((dest), (src)) +#endif + +/* C API functions */ +#define _cbson_buffer_write_bytes_INDEX 0 +#define _cbson_buffer_write_bytes_RETURN int +#define _cbson_buffer_write_bytes_PROTO (buffer_t buffer, const char* data, int size) + +#define _cbson_write_dict_INDEX 1 +#define _cbson_write_dict_RETURN int +#define _cbson_write_dict_PROTO (PyObject* self, buffer_t buffer, PyObject* dict, unsigned char check_keys, unsigned char uuid_subtype, unsigned char top_level) + +#define _cbson_write_pair_INDEX 2 +#define _cbson_write_pair_RETURN int +#define _cbson_write_pair_PROTO (PyObject* self, buffer_t buffer, const char* name, int name_length, PyObject* value, unsigned char check_keys, unsigned char uuid_subtype, unsigned char allow_id) + +#define _cbson_decode_and_write_pair_INDEX 3 +#define _cbson_decode_and_write_pair_RETURN int +#define _cbson_decode_and_write_pair_PROTO (PyObject* self, buffer_t buffer, PyObject* key, PyObject* value, unsigned char check_keys, unsigned char uuid_subtype, unsigned char top_level) + +/* Total number of C API pointers */ +#define _cbson_API_POINTER_COUNT 4 + +#ifdef _CBSON_MODULE +/* This section is used when compiling _cbsonmodule */ + +static _cbson_buffer_write_bytes_RETURN buffer_write_bytes _cbson_buffer_write_bytes_PROTO; + +static _cbson_write_dict_RETURN write_dict _cbson_write_dict_PROTO; + +static _cbson_write_pair_RETURN write_pair _cbson_write_pair_PROTO; + +static _cbson_decode_and_write_pair_RETURN decode_and_write_pair _cbson_decode_and_write_pair_PROTO; + +#else +/* This section is used in modules that use _cbsonmodule's API */ + +static void **_cbson_API; + +#define buffer_write_bytes (*(_cbson_buffer_write_bytes_RETURN (*)_cbson_buffer_write_bytes_PROTO) _cbson_API[_cbson_buffer_write_bytes_INDEX]) + +#define write_dict (*(_cbson_write_dict_RETURN (*)_cbson_write_dict_PROTO) _cbson_API[_cbson_write_dict_INDEX]) + +#define write_pair (*(_cbson_write_pair_RETURN (*)_cbson_write_pair_PROTO) _cbson_API[_cbson_write_pair_INDEX]) + +#define decode_and_write_pair (*(_cbson_decode_and_write_pair_RETURN (*)_cbson_decode_and_write_pair_PROTO) _cbson_API[_cbson_decode_and_write_pair_INDEX]) + +#define _cbson_IMPORT _cbson_API = (void **)PyCapsule_Import("_cbson._C_API", 0) + +#endif + +#endif // _CBSONMODULE_H diff --git a/asyncio_mongo/_bson/buffer.c b/asyncio_mongo/_bson/buffer.c new file mode 100644 index 0000000..19d528c --- /dev/null +++ b/asyncio_mongo/_bson/buffer.c @@ -0,0 +1,146 @@ +/* + * Copyright 2009-2012 10gen, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "buffer.h" + +#define INITIAL_BUFFER_SIZE 256 + +struct buffer { + char* buffer; + int size; + int position; +}; + +/* Allocate and return a new buffer. + * Return NULL on allocation failure. */ +buffer_t buffer_new(void) { + buffer_t buffer; + buffer = (buffer_t)malloc(sizeof(struct buffer)); + if (buffer == NULL) { + return NULL; + } + + buffer->size = INITIAL_BUFFER_SIZE; + buffer->position = 0; + buffer->buffer = (char*)malloc(sizeof(char) * INITIAL_BUFFER_SIZE); + if (buffer->buffer == NULL) { + free(buffer); + return NULL; + } + + return buffer; +} + +/* Free the memory allocated for `buffer`. + * Return non-zero on failure. */ +int buffer_free(buffer_t buffer) { + if (buffer == NULL) { + return 1; + } + free(buffer->buffer); + free(buffer); + return 0; +} + +/* Grow `buffer` to at least `min_length`. + * Return non-zero on allocation failure. */ +static int buffer_grow(buffer_t buffer, int min_length) { + int old_size = 0; + int size = buffer->size; + char* old_buffer = buffer->buffer; + if (size >= min_length) { + return 0; + } + while (size < min_length) { + old_size = size; + size *= 2; + if (size <= old_size) { + /* Size did not increase. Could be an overflow + * or size < 1. Just go with min_length. */ + size = min_length; + } + } + buffer->buffer = (char*)realloc(buffer->buffer, sizeof(char) * size); + if (buffer->buffer == NULL) { + free(old_buffer); + free(buffer); + return 1; + } + buffer->size = size; + return 0; +} + +/* Assure that `buffer` has at least `size` free bytes (and grow if needed). + * Return non-zero on allocation failure. */ +static int buffer_assure_space(buffer_t buffer, int size) { + if (buffer->position + size <= buffer->size) { + return 0; + } + return buffer_grow(buffer, buffer->position + size); +} + +/* Save `size` bytes from the current position in `buffer` (and grow if needed). + * Return offset for writing, or -1 on allocation failure. */ +buffer_position buffer_save_space(buffer_t buffer, int size) { + int position = buffer->position; + if (buffer_assure_space(buffer, size) != 0) { + return -1; + } + buffer->position += size; + return position; +} + +/* Write `size` bytes from `data` to `buffer` (and grow if needed). + * Return non-zero on allocation failure. */ +int buffer_write(buffer_t buffer, const char* data, int size) { + if (buffer_assure_space(buffer, size) != 0) { + return 1; + } + + memcpy(buffer->buffer + buffer->position, data, size); + buffer->position += size; + return 0; +} + +/* Write `size` bytes from `data` to `buffer` at position `position`. + * Does not change the internal position of `buffer`. + * Return non-zero if buffer isn't large enough for write. */ +int buffer_write_at_position(buffer_t buffer, buffer_position position, + const char* data, int size) { + if (position + size > buffer->size) { + buffer_free(buffer); + return 1; + } + + memcpy(buffer->buffer + position, data, size); + return 0; +} + + +int buffer_get_position(buffer_t buffer) { + return buffer->position; +} + +char* buffer_get_buffer(buffer_t buffer) { + return buffer->buffer; +} + +void buffer_update_position(buffer_t buffer, buffer_position new_position) { + buffer->position = new_position; +} diff --git a/asyncio_mongo/_bson/buffer.h b/asyncio_mongo/_bson/buffer.h new file mode 100644 index 0000000..23a46d9 --- /dev/null +++ b/asyncio_mongo/_bson/buffer.h @@ -0,0 +1,56 @@ +/* + * Copyright 2009-2012 10gen, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BUFFER_H +#define BUFFER_H + +/* Note: if any of these functions return a failure condition then the buffer + * has already been freed. */ + +/* A buffer */ +typedef struct buffer* buffer_t; +/* A position in the buffer */ +typedef int buffer_position; + +/* Allocate and return a new buffer. + * Return NULL on allocation failure. */ +buffer_t buffer_new(void); + +/* Free the memory allocated for `buffer`. + * Return non-zero on failure. */ +int buffer_free(buffer_t buffer); + +/* Save `size` bytes from the current position in `buffer` (and grow if needed). + * Return offset for writing, or -1 on allocation failure. */ +buffer_position buffer_save_space(buffer_t buffer, int size); + +/* Write `size` bytes from `data` to `buffer` (and grow if needed). + * Return non-zero on allocation failure. */ +int buffer_write(buffer_t buffer, const char* data, int size); + +/* Write `size` bytes from `data` to `buffer` at position `position`. + * Does not change the internal position of `buffer`. + * Return non-zero if buffer isn't large enough for write. */ +int buffer_write_at_position(buffer_t buffer, buffer_position position, const char* data, int size); + +/* Getters for the internals of a buffer_t. + * Should try to avoid using these as much as possible + * since they break the abstraction. */ +buffer_position buffer_get_position(buffer_t buffer); +char* buffer_get_buffer(buffer_t buffer); +void buffer_update_position(buffer_t buffer, buffer_position new_position); + +#endif diff --git a/asyncio_mongo/_bson/encoding_helpers.c b/asyncio_mongo/_bson/encoding_helpers.c new file mode 100644 index 0000000..d7f8faf --- /dev/null +++ b/asyncio_mongo/_bson/encoding_helpers.c @@ -0,0 +1,118 @@ +/* + * Copyright 2009-2012 10gen, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "encoding_helpers.h" + +/* + * Portions Copyright 2001 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* --------------------------------------------------------------------- */ + +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * The length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns 0. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ +static unsigned char isLegalUTF8(const unsigned char* source, int length) { + unsigned char a; + const unsigned char* srcptr = source + length; + switch (length) { + default: return 0; + /* Everything else falls through when "true"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; + case 2: if ((a = (*--srcptr)) > 0xBF) return 0; + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return 0; break; + case 0xF0: if (a < 0x90) return 0; break; + case 0xF4: if (a > 0x8F) return 0; break; + default: if (a < 0x80) return 0; + } + case 1: if (*source >= 0x80 && *source < 0xC2) return 0; + if (*source > 0xF4) return 0; + } + return 1; +} + +result_t check_string(const unsigned char* string, const int length, + const char check_utf8, const char check_null) { + int position = 0; + /* By default we go character by character. Will be different for checking + * UTF-8 */ + int sequence_length = 1; + + if (!check_utf8 && !check_null) { + return VALID; + } + + while (position < length) { + if (check_null && *(string + position) == 0) { + return HAS_NULL; + } + if (check_utf8) { + sequence_length = trailingBytesForUTF8[*(string + position)] + 1; + if ((position + sequence_length) > length) { + return NOT_UTF_8; + } + if (!isLegalUTF8(string + position, sequence_length)) { + return NOT_UTF_8; + } + } + position += sequence_length; + } + + return VALID; +} diff --git a/asyncio_mongo/_bson/encoding_helpers.h b/asyncio_mongo/_bson/encoding_helpers.h new file mode 100644 index 0000000..6d0ba0f --- /dev/null +++ b/asyncio_mongo/_bson/encoding_helpers.h @@ -0,0 +1,29 @@ +/* + * Copyright 2009-2012 10gen, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENCODING_HELPERS_H +#define ENCODING_HELPERS_H + +typedef enum { + VALID, + NOT_UTF_8, + HAS_NULL +} result_t; + +result_t check_string(const unsigned char* string, const int length, + const char check_utf8, const char check_null); + +#endif diff --git a/asyncio_mongo/_bson/time64.c b/asyncio_mongo/_bson/time64.c new file mode 100644 index 0000000..5421dc9 --- /dev/null +++ b/asyncio_mongo/_bson/time64.c @@ -0,0 +1,789 @@ +/* + +Copyright (c) 2007-2010 Michael G Schwern + +This software originally derived from Paul Sheer's pivotal_gmtime_r.c. + +The MIT License: + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +/* + +Programmers who have available to them 64-bit time values as a 'long +long' type can use localtime64_r() and gmtime64_r() which correctly +converts the time even on 32-bit systems. Whether you have 64-bit time +values will depend on the operating system. + +localtime64_r() is a 64-bit equivalent of localtime_r(). + +gmtime64_r() is a 64-bit equivalent of gmtime_r(). + +*/ + +#ifdef _MSC_VER + #define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include +#include +#include +#include +#include "time64.h" +#include "time64_limits.h" + + +/* Spec says except for stftime() and the _r() functions, these + all return static memory. Stabbings! */ +static struct TM Static_Return_Date; + +static const int days_in_month[2][12] = { + {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, +}; + +static const int julian_days_by_month[2][12] = { + {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}, + {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335}, +}; + +static const int length_of_year[2] = { 365, 366 }; + +/* Some numbers relating to the gregorian cycle */ +static const Year years_in_gregorian_cycle = 400; +#define days_in_gregorian_cycle ((365 * 400) + 100 - 4 + 1) +static const Time64_T seconds_in_gregorian_cycle = days_in_gregorian_cycle * 60LL * 60LL * 24LL; + +/* Year range we can trust the time funcitons with */ +#define MAX_SAFE_YEAR 2037 +#define MIN_SAFE_YEAR 1971 + +/* 28 year Julian calendar cycle */ +#define SOLAR_CYCLE_LENGTH 28 + +/* Year cycle from MAX_SAFE_YEAR down. */ +static const int safe_years_high[SOLAR_CYCLE_LENGTH] = { + 2016, 2017, 2018, 2019, + 2020, 2021, 2022, 2023, + 2024, 2025, 2026, 2027, + 2028, 2029, 2030, 2031, + 2032, 2033, 2034, 2035, + 2036, 2037, 2010, 2011, + 2012, 2013, 2014, 2015 +}; + +/* Year cycle from MIN_SAFE_YEAR up */ +static const int safe_years_low[SOLAR_CYCLE_LENGTH] = { + 1996, 1997, 1998, 1971, + 1972, 1973, 1974, 1975, + 1976, 1977, 1978, 1979, + 1980, 1981, 1982, 1983, + 1984, 1985, 1986, 1987, + 1988, 1989, 1990, 1991, + 1992, 1993, 1994, 1995, +}; + +/* This isn't used, but it's handy to look at */ +static const int dow_year_start[SOLAR_CYCLE_LENGTH] = { + 5, 0, 1, 2, /* 0 2016 - 2019 */ + 3, 5, 6, 0, /* 4 */ + 1, 3, 4, 5, /* 8 1996 - 1998, 1971*/ + 6, 1, 2, 3, /* 12 1972 - 1975 */ + 4, 6, 0, 1, /* 16 */ + 2, 4, 5, 6, /* 20 2036, 2037, 2010, 2011 */ + 0, 2, 3, 4 /* 24 2012, 2013, 2014, 2015 */ +}; + +/* Let's assume people are going to be looking for dates in the future. + Let's provide some cheats so you can skip ahead. + This has a 4x speed boost when near 2008. +*/ +/* Number of days since epoch on Jan 1st, 2008 GMT */ +#define CHEAT_DAYS (1199145600 / 24 / 60 / 60) +#define CHEAT_YEARS 108 + +#define IS_LEAP(n) ((!(((n) + 1900) % 400) || (!(((n) + 1900) % 4) && (((n) + 1900) % 100))) != 0) +#define WRAP(a,b,m) ((a) = ((a) < 0 ) ? ((b)--, (a) + (m)) : (a)) + +#ifdef USE_SYSTEM_LOCALTIME +# define SHOULD_USE_SYSTEM_LOCALTIME(a) ( \ + (a) <= SYSTEM_LOCALTIME_MAX && \ + (a) >= SYSTEM_LOCALTIME_MIN \ +) +#else +# define SHOULD_USE_SYSTEM_LOCALTIME(a) (0) +#endif + +#ifdef USE_SYSTEM_GMTIME +# define SHOULD_USE_SYSTEM_GMTIME(a) ( \ + (a) <= SYSTEM_GMTIME_MAX && \ + (a) >= SYSTEM_GMTIME_MIN \ +) +#else +# define SHOULD_USE_SYSTEM_GMTIME(a) (0) +#endif + +/* Multi varadic macros are a C99 thing, alas */ +#ifdef TIME_64_DEBUG +# define TIME64_TRACE(format) (fprintf(stderr, format)) +# define TIME64_TRACE1(format, var1) (fprintf(stderr, format, var1)) +# define TIME64_TRACE2(format, var1, var2) (fprintf(stderr, format, var1, var2)) +# define TIME64_TRACE3(format, var1, var2, var3) (fprintf(stderr, format, var1, var2, var3)) +#else +# define TIME64_TRACE(format) ((void)0) +# define TIME64_TRACE1(format, var1) ((void)0) +# define TIME64_TRACE2(format, var1, var2) ((void)0) +# define TIME64_TRACE3(format, var1, var2, var3) ((void)0) +#endif + + +static int is_exception_century(Year year) +{ + int is_exception = ((year % 100 == 0) && !(year % 400 == 0)); + TIME64_TRACE1("# is_exception_century: %s\n", is_exception ? "yes" : "no"); + + return(is_exception); +} + + +/* Compare two dates. + The result is like cmp. + Ignores things like gmtoffset and dst +*/ +int cmp_date( const struct TM* left, const struct tm* right ) { + if( left->tm_year > right->tm_year ) + return 1; + else if( left->tm_year < right->tm_year ) + return -1; + + if( left->tm_mon > right->tm_mon ) + return 1; + else if( left->tm_mon < right->tm_mon ) + return -1; + + if( left->tm_mday > right->tm_mday ) + return 1; + else if( left->tm_mday < right->tm_mday ) + return -1; + + if( left->tm_hour > right->tm_hour ) + return 1; + else if( left->tm_hour < right->tm_hour ) + return -1; + + if( left->tm_min > right->tm_min ) + return 1; + else if( left->tm_min < right->tm_min ) + return -1; + + if( left->tm_sec > right->tm_sec ) + return 1; + else if( left->tm_sec < right->tm_sec ) + return -1; + + return 0; +} + + +/* Check if a date is safely inside a range. + The intention is to check if its a few days inside. +*/ +int date_in_safe_range( const struct TM* date, const struct tm* min, const struct tm* max ) { + if( cmp_date(date, min) == -1 ) + return 0; + + if( cmp_date(date, max) == 1 ) + return 0; + + return 1; +} + + +/* timegm() is not in the C or POSIX spec, but it is such a useful + extension I would be remiss in leaving it out. Also I need it + for localtime64() +*/ +Time64_T timegm64(const struct TM *date) { + Time64_T days = 0; + Time64_T seconds = 0; + Year year; + Year orig_year = (Year)date->tm_year; + int cycles = 0; + + if( orig_year > 100 ) { + cycles = (int)((orig_year - 100) / 400); + orig_year -= cycles * 400; + days += (Time64_T)cycles * days_in_gregorian_cycle; + } + else if( orig_year < -300 ) { + cycles = (int)((orig_year - 100) / 400); + orig_year -= cycles * 400; + days += (Time64_T)cycles * days_in_gregorian_cycle; + } + TIME64_TRACE3("# timegm/ cycles: %d, days: %lld, orig_year: %lld\n", cycles, days, orig_year); + + if( orig_year > 70 ) { + year = 70; + while( year < orig_year ) { + days += length_of_year[IS_LEAP(year)]; + year++; + } + } + else if ( orig_year < 70 ) { + year = 69; + do { + days -= length_of_year[IS_LEAP(year)]; + year--; + } while( year >= orig_year ); + } + + days += julian_days_by_month[IS_LEAP(orig_year)][date->tm_mon]; + days += date->tm_mday - 1; + + seconds = days * 60 * 60 * 24; + + seconds += date->tm_hour * 60 * 60; + seconds += date->tm_min * 60; + seconds += date->tm_sec; + + return(seconds); +} + + +#ifndef NDEBUG +static int check_tm(struct TM *tm) +{ + /* Don't forget leap seconds */ + assert(tm->tm_sec >= 0); + assert(tm->tm_sec <= 61); + + assert(tm->tm_min >= 0); + assert(tm->tm_min <= 59); + + assert(tm->tm_hour >= 0); + assert(tm->tm_hour <= 23); + + assert(tm->tm_mday >= 1); + assert(tm->tm_mday <= days_in_month[IS_LEAP(tm->tm_year)][tm->tm_mon]); + + assert(tm->tm_mon >= 0); + assert(tm->tm_mon <= 11); + + assert(tm->tm_wday >= 0); + assert(tm->tm_wday <= 6); + + assert(tm->tm_yday >= 0); + assert(tm->tm_yday <= length_of_year[IS_LEAP(tm->tm_year)]); + +#ifdef HAS_TM_TM_GMTOFF + assert(tm->tm_gmtoff >= -24 * 60 * 60); + assert(tm->tm_gmtoff <= 24 * 60 * 60); +#endif + + return 1; +} +#endif + + +/* The exceptional centuries without leap years cause the cycle to + shift by 16 +*/ +static Year cycle_offset(Year year) +{ + const Year start_year = 2000; + Year year_diff = year - start_year; + Year exceptions; + + if( year > start_year ) + year_diff--; + + exceptions = year_diff / 100; + exceptions -= year_diff / 400; + + TIME64_TRACE3("# year: %lld, exceptions: %lld, year_diff: %lld\n", + year, exceptions, year_diff); + + return exceptions * 16; +} + +/* For a given year after 2038, pick the latest possible matching + year in the 28 year calendar cycle. + + A matching year... + 1) Starts on the same day of the week. + 2) Has the same leap year status. + + This is so the calendars match up. + + Also the previous year must match. When doing Jan 1st you might + wind up on Dec 31st the previous year when doing a -UTC time zone. + + Finally, the next year must have the same start day of week. This + is for Dec 31st with a +UTC time zone. + It doesn't need the same leap year status since we only care about + January 1st. +*/ +static int safe_year(const Year year) +{ + int safe_year = 0; + Year year_cycle; + + if( year >= MIN_SAFE_YEAR && year <= MAX_SAFE_YEAR ) { + return (int)year; + } + + year_cycle = year + cycle_offset(year); + + /* safe_years_low is off from safe_years_high by 8 years */ + if( year < MIN_SAFE_YEAR ) + year_cycle -= 8; + + /* Change non-leap xx00 years to an equivalent */ + if( is_exception_century(year) ) + year_cycle += 11; + + /* Also xx01 years, since the previous year will be wrong */ + if( is_exception_century(year - 1) ) + year_cycle += 17; + + year_cycle %= SOLAR_CYCLE_LENGTH; + if( year_cycle < 0 ) + year_cycle = SOLAR_CYCLE_LENGTH + year_cycle; + + assert( year_cycle >= 0 ); + assert( year_cycle < SOLAR_CYCLE_LENGTH ); + if( year < MIN_SAFE_YEAR ) + safe_year = safe_years_low[year_cycle]; + else if( year > MAX_SAFE_YEAR ) + safe_year = safe_years_high[year_cycle]; + else + assert(0); + + TIME64_TRACE3("# year: %lld, year_cycle: %lld, safe_year: %d\n", + year, year_cycle, safe_year); + + assert(safe_year <= MAX_SAFE_YEAR && safe_year >= MIN_SAFE_YEAR); + + return safe_year; +} + + +void copy_tm_to_TM64(const struct tm *src, struct TM *dest) { + if( src == NULL ) { + memset(dest, 0, sizeof(*dest)); + } + else { +# ifdef USE_TM64 + dest->tm_sec = src->tm_sec; + dest->tm_min = src->tm_min; + dest->tm_hour = src->tm_hour; + dest->tm_mday = src->tm_mday; + dest->tm_mon = src->tm_mon; + dest->tm_year = (Year)src->tm_year; + dest->tm_wday = src->tm_wday; + dest->tm_yday = src->tm_yday; + dest->tm_isdst = src->tm_isdst; + +# ifdef HAS_TM_TM_GMTOFF + dest->tm_gmtoff = src->tm_gmtoff; +# endif + +# ifdef HAS_TM_TM_ZONE + dest->tm_zone = src->tm_zone; +# endif + +# else + /* They're the same type */ + memcpy(dest, src, sizeof(*dest)); +# endif + } +} + + +void copy_TM64_to_tm(const struct TM *src, struct tm *dest) { + if( src == NULL ) { + memset(dest, 0, sizeof(*dest)); + } + else { +# ifdef USE_TM64 + dest->tm_sec = src->tm_sec; + dest->tm_min = src->tm_min; + dest->tm_hour = src->tm_hour; + dest->tm_mday = src->tm_mday; + dest->tm_mon = src->tm_mon; + dest->tm_year = (int)src->tm_year; + dest->tm_wday = src->tm_wday; + dest->tm_yday = src->tm_yday; + dest->tm_isdst = src->tm_isdst; + +# ifdef HAS_TM_TM_GMTOFF + dest->tm_gmtoff = src->tm_gmtoff; +# endif + +# ifdef HAS_TM_TM_ZONE + dest->tm_zone = src->tm_zone; +# endif + +# else + /* They're the same type */ + memcpy(dest, src, sizeof(*dest)); +# endif + } +} + + +/* Simulate localtime_r() to the best of our ability */ +struct tm * fake_localtime_r(const time_t *time, struct tm *result) { + const struct tm *static_result = localtime(time); + + assert(result != NULL); + + if( static_result == NULL ) { + memset(result, 0, sizeof(*result)); + return NULL; + } + else { + memcpy(result, static_result, sizeof(*result)); + return result; + } +} + + +/* Simulate gmtime_r() to the best of our ability */ +struct tm * fake_gmtime_r(const time_t *time, struct tm *result) { + const struct tm *static_result = gmtime(time); + + assert(result != NULL); + + if( static_result == NULL ) { + memset(result, 0, sizeof(*result)); + return NULL; + } + else { + memcpy(result, static_result, sizeof(*result)); + return result; + } +} + + +static Time64_T seconds_between_years(Year left_year, Year right_year) { + int increment = (left_year > right_year) ? 1 : -1; + Time64_T seconds = 0; + int cycles; + + if( left_year > 2400 ) { + cycles = (int)((left_year - 2400) / 400); + left_year -= cycles * 400; + seconds += cycles * seconds_in_gregorian_cycle; + } + else if( left_year < 1600 ) { + cycles = (int)((left_year - 1600) / 400); + left_year += cycles * 400; + seconds += cycles * seconds_in_gregorian_cycle; + } + + while( left_year != right_year ) { + seconds += length_of_year[IS_LEAP(right_year - 1900)] * 60 * 60 * 24; + right_year += increment; + } + + return seconds * increment; +} + + +Time64_T mktime64(const struct TM *input_date) { + struct tm safe_date; + struct TM date; + Time64_T time; + Year year = input_date->tm_year + 1900; + + if( date_in_safe_range(input_date, &SYSTEM_MKTIME_MIN, &SYSTEM_MKTIME_MAX) ) + { + copy_TM64_to_tm(input_date, &safe_date); + return (Time64_T)mktime(&safe_date); + } + + /* Have to make the year safe in date else it won't fit in safe_date */ + date = *input_date; + date.tm_year = safe_year(year) - 1900; + copy_TM64_to_tm(&date, &safe_date); + + time = (Time64_T)mktime(&safe_date); + + time += seconds_between_years(year, (Year)(safe_date.tm_year + 1900)); + + return time; +} + + +/* Because I think mktime() is a crappy name */ +Time64_T timelocal64(const struct TM *date) { + return mktime64(date); +} + + +struct TM *gmtime64_r (const Time64_T *in_time, struct TM *p) +{ + int v_tm_sec, v_tm_min, v_tm_hour, v_tm_mon, v_tm_wday; + Time64_T v_tm_tday; + int leap; + Time64_T m; + Time64_T time = *in_time; + Year year = 70; + int cycles = 0; + + assert(p != NULL); + + /* Use the system gmtime() if time_t is small enough */ + if( SHOULD_USE_SYSTEM_GMTIME(*in_time) ) { + time_t safe_time = (time_t)*in_time; + struct tm safe_date; + GMTIME_R(&safe_time, &safe_date); + + copy_tm_to_TM64(&safe_date, p); + assert(check_tm(p)); + + return p; + } + +#ifdef HAS_TM_TM_GMTOFF + p->tm_gmtoff = 0; +#endif + p->tm_isdst = 0; + +#ifdef HAS_TM_TM_ZONE + p->tm_zone = "UTC"; +#endif + + v_tm_sec = (int)(time % 60); + time /= 60; + v_tm_min = (int)(time % 60); + time /= 60; + v_tm_hour = (int)(time % 24); + time /= 24; + v_tm_tday = time; + + WRAP (v_tm_sec, v_tm_min, 60); + WRAP (v_tm_min, v_tm_hour, 60); + WRAP (v_tm_hour, v_tm_tday, 24); + + v_tm_wday = (int)((v_tm_tday + 4) % 7); + if (v_tm_wday < 0) + v_tm_wday += 7; + m = v_tm_tday; + + if (m >= CHEAT_DAYS) { + year = CHEAT_YEARS; + m -= CHEAT_DAYS; + } + + if (m >= 0) { + /* Gregorian cycles, this is huge optimization for distant times */ + cycles = (int)(m / (Time64_T) days_in_gregorian_cycle); + if( cycles ) { + m -= (cycles * (Time64_T) days_in_gregorian_cycle); + year += (cycles * years_in_gregorian_cycle); + } + + /* Years */ + leap = IS_LEAP (year); + while (m >= (Time64_T) length_of_year[leap]) { + m -= (Time64_T) length_of_year[leap]; + year++; + leap = IS_LEAP (year); + } + + /* Months */ + v_tm_mon = 0; + while (m >= (Time64_T) days_in_month[leap][v_tm_mon]) { + m -= (Time64_T) days_in_month[leap][v_tm_mon]; + v_tm_mon++; + } + } else { + year--; + + /* Gregorian cycles */ + cycles = (int)((m / (Time64_T) days_in_gregorian_cycle) + 1); + if( cycles ) { + m -= (cycles * (Time64_T) days_in_gregorian_cycle); + year += (cycles * years_in_gregorian_cycle); + } + + /* Years */ + leap = IS_LEAP (year); + while (m < (Time64_T) -length_of_year[leap]) { + m += (Time64_T) length_of_year[leap]; + year--; + leap = IS_LEAP (year); + } + + /* Months */ + v_tm_mon = 11; + while (m < (Time64_T) -days_in_month[leap][v_tm_mon]) { + m += (Time64_T) days_in_month[leap][v_tm_mon]; + v_tm_mon--; + } + m += (Time64_T) days_in_month[leap][v_tm_mon]; + } + + p->tm_year = (int)year; + if( p->tm_year != year ) { +#ifdef EOVERFLOW + errno = EOVERFLOW; +#endif + return NULL; + } + + /* At this point m is less than a year so casting to an int is safe */ + p->tm_mday = (int) m + 1; + p->tm_yday = julian_days_by_month[leap][v_tm_mon] + (int)m; + p->tm_sec = v_tm_sec; + p->tm_min = v_tm_min; + p->tm_hour = v_tm_hour; + p->tm_mon = v_tm_mon; + p->tm_wday = v_tm_wday; + + assert(check_tm(p)); + + return p; +} + + +struct TM *localtime64_r (const Time64_T *time, struct TM *local_tm) +{ + time_t safe_time; + struct tm safe_date; + struct TM gm_tm; + Year orig_year; + int month_diff; + + assert(local_tm != NULL); + + /* Use the system localtime() if time_t is small enough */ + if( SHOULD_USE_SYSTEM_LOCALTIME(*time) ) { + safe_time = (time_t)*time; + + TIME64_TRACE1("Using system localtime for %lld\n", *time); + + LOCALTIME_R(&safe_time, &safe_date); + + copy_tm_to_TM64(&safe_date, local_tm); + assert(check_tm(local_tm)); + + return local_tm; + } + + if( gmtime64_r(time, &gm_tm) == NULL ) { + TIME64_TRACE1("gmtime64_r returned null for %lld\n", *time); + return NULL; + } + + orig_year = gm_tm.tm_year; + + if (gm_tm.tm_year > (2037 - 1900) || + gm_tm.tm_year < (1970 - 1900) + ) + { + TIME64_TRACE1("Mapping tm_year %lld to safe_year\n", (Year)gm_tm.tm_year); + gm_tm.tm_year = safe_year((Year)(gm_tm.tm_year + 1900)) - 1900; + } + + safe_time = (time_t)timegm64(&gm_tm); + if( LOCALTIME_R(&safe_time, &safe_date) == NULL ) { + TIME64_TRACE1("localtime_r(%d) returned NULL\n", (int)safe_time); + return NULL; + } + + copy_tm_to_TM64(&safe_date, local_tm); + + local_tm->tm_year = (int)orig_year; + if( local_tm->tm_year != orig_year ) { + TIME64_TRACE2("tm_year overflow: tm_year %lld, orig_year %lld\n", + (Year)local_tm->tm_year, (Year)orig_year); + +#ifdef EOVERFLOW + errno = EOVERFLOW; +#endif + return NULL; + } + + + month_diff = local_tm->tm_mon - gm_tm.tm_mon; + + /* When localtime is Dec 31st previous year and + gmtime is Jan 1st next year. + */ + if( month_diff == 11 ) { + local_tm->tm_year--; + } + + /* When localtime is Jan 1st, next year and + gmtime is Dec 31st, previous year. + */ + if( month_diff == -11 ) { + local_tm->tm_year++; + } + + /* GMT is Jan 1st, xx01 year, but localtime is still Dec 31st + in a non-leap xx00. There is one point in the cycle + we can't account for which the safe xx00 year is a leap + year. So we need to correct for Dec 31st comming out as + the 366th day of the year. + */ + if( !IS_LEAP(local_tm->tm_year) && local_tm->tm_yday == 365 ) + local_tm->tm_yday--; + + assert(check_tm(local_tm)); + + return local_tm; +} + + +int valid_tm_wday( const struct TM* date ) { + if( 0 <= date->tm_wday && date->tm_wday <= 6 ) + return 1; + else + return 0; +} + +int valid_tm_mon( const struct TM* date ) { + if( 0 <= date->tm_mon && date->tm_mon <= 11 ) + return 1; + else + return 0; +} + + +/* Non-thread safe versions of the above */ +struct TM *localtime64(const Time64_T *time) { +#ifdef _MSC_VER + _tzset(); +#else + tzset(); +#endif + return localtime64_r(time, &Static_Return_Date); +} + +struct TM *gmtime64(const Time64_T *time) { + return gmtime64_r(time, &Static_Return_Date); +} diff --git a/asyncio_mongo/_bson/time64.h b/asyncio_mongo/_bson/time64.h new file mode 100644 index 0000000..df9be9b --- /dev/null +++ b/asyncio_mongo/_bson/time64.h @@ -0,0 +1,67 @@ +#ifndef TIME64_H +# define TIME64_H + +#include +#include "time64_config.h" + +/* Set our custom types */ +typedef INT_64_T Int64; +typedef Int64 Time64_T; +typedef Int64 Year; + + +/* A copy of the tm struct but with a 64 bit year */ +struct TM64 { + int tm_sec; + int tm_min; + int tm_hour; + int tm_mday; + int tm_mon; + Year tm_year; + int tm_wday; + int tm_yday; + int tm_isdst; + +#ifdef HAS_TM_TM_GMTOFF + long tm_gmtoff; +#endif + +#ifdef HAS_TM_TM_ZONE + char *tm_zone; +#endif +}; + + +/* Decide which tm struct to use */ +#ifdef USE_TM64 +#define TM TM64 +#else +#define TM tm +#endif + + +/* Declare public functions */ +struct TM *gmtime64_r (const Time64_T *, struct TM *); +struct TM *localtime64_r (const Time64_T *, struct TM *); +struct TM *gmtime64 (const Time64_T *); +struct TM *localtime64 (const Time64_T *); + +Time64_T timegm64 (const struct TM *); +Time64_T mktime64 (const struct TM *); +Time64_T timelocal64 (const struct TM *); + + +/* Not everyone has gm/localtime_r(), provide a replacement */ +#ifdef HAS_LOCALTIME_R +# define LOCALTIME_R(clock, result) localtime_r(clock, result) +#else +# define LOCALTIME_R(clock, result) fake_localtime_r(clock, result) +#endif +#ifdef HAS_GMTIME_R +# define GMTIME_R(clock, result) gmtime_r(clock, result) +#else +# define GMTIME_R(clock, result) fake_gmtime_r(clock, result) +#endif + + +#endif diff --git a/asyncio_mongo/_bson/time64_config.h b/asyncio_mongo/_bson/time64_config.h new file mode 100644 index 0000000..9d4c111 --- /dev/null +++ b/asyncio_mongo/_bson/time64_config.h @@ -0,0 +1,78 @@ +/* Configuration + ------------- + Define as appropriate for your system. + Sensible defaults provided. +*/ + + +#ifndef TIME64_CONFIG_H +# define TIME64_CONFIG_H + +/* Debugging + TIME_64_DEBUG + Define if you want debugging messages +*/ +/* #define TIME_64_DEBUG */ + + +/* INT_64_T + A 64 bit integer type to use to store time and others. + Must be defined. +*/ +#define INT_64_T long long + + +/* USE_TM64 + Should we use a 64 bit safe replacement for tm? This will + let you go past year 2 billion but the struct will be incompatible + with tm. Conversion functions will be provided. +*/ +/* #define USE_TM64 */ + + +/* Availability of system functions. + + HAS_GMTIME_R + Define if your system has gmtime_r() + + HAS_LOCALTIME_R + Define if your system has localtime_r() + + HAS_TIMEGM + Define if your system has timegm(), a GNU extension. +*/ +#if !defined(WIN32) && !defined(_MSC_VER) +#define HAS_GMTIME_R +#define HAS_LOCALTIME_R +#endif +/* #define HAS_TIMEGM */ + + +/* Details of non-standard tm struct elements. + + HAS_TM_TM_GMTOFF + True if your tm struct has a "tm_gmtoff" element. + A BSD extension. + + HAS_TM_TM_ZONE + True if your tm struct has a "tm_zone" element. + A BSD extension. +*/ +/* #define HAS_TM_TM_GMTOFF */ +/* #define HAS_TM_TM_ZONE */ + + +/* USE_SYSTEM_LOCALTIME + USE_SYSTEM_GMTIME + USE_SYSTEM_MKTIME + USE_SYSTEM_TIMEGM + Should we use the system functions if the time is inside their range? + Your system localtime() is probably more accurate, but our gmtime() is + fast and safe. +*/ +#define USE_SYSTEM_LOCALTIME +/* #define USE_SYSTEM_GMTIME */ +#define USE_SYSTEM_MKTIME +/* #define USE_SYSTEM_TIMEGM */ + +#endif /* TIME64_CONFIG_H */ diff --git a/asyncio_mongo/_bson/time64_limits.h b/asyncio_mongo/_bson/time64_limits.h new file mode 100644 index 0000000..fd4455f --- /dev/null +++ b/asyncio_mongo/_bson/time64_limits.h @@ -0,0 +1,95 @@ +/* + Maximum and minimum inputs your system's respective time functions + can correctly handle. time64.h will use your system functions if + the input falls inside these ranges and corresponding USE_SYSTEM_* + constant is defined. +*/ + +#ifndef TIME64_LIMITS_H +#define TIME64_LIMITS_H + +/* Max/min for localtime() */ +#define SYSTEM_LOCALTIME_MAX 2147483647 +#define SYSTEM_LOCALTIME_MIN -2147483647-1 + +/* Max/min for gmtime() */ +#define SYSTEM_GMTIME_MAX 2147483647 +#define SYSTEM_GMTIME_MIN -2147483647-1 + +/* Max/min for mktime() */ +static const struct tm SYSTEM_MKTIME_MAX = { + 7, + 14, + 19, + 18, + 0, + 138, + 1, + 17, + 0 +#ifdef HAS_TM_TM_GMTOFF + ,-28800 +#endif +#ifdef HAS_TM_TM_ZONE + ,"PST" +#endif +}; + +static const struct tm SYSTEM_MKTIME_MIN = { + 52, + 45, + 12, + 13, + 11, + 1, + 5, + 346, + 0 +#ifdef HAS_TM_TM_GMTOFF + ,-28800 +#endif +#ifdef HAS_TM_TM_ZONE + ,"PST" +#endif +}; + +/* Max/min for timegm() */ +#ifdef HAS_TIMEGM +static const struct tm SYSTEM_TIMEGM_MAX = { + 7, + 14, + 3, + 19, + 0, + 138, + 2, + 18, + 0 + #ifdef HAS_TM_TM_GMTOFF + ,0 + #endif + #ifdef HAS_TM_TM_ZONE + ,"UTC" + #endif +}; + +static const struct tm SYSTEM_TIMEGM_MIN = { + 52, + 45, + 20, + 13, + 11, + 1, + 5, + 346, + 0 + #ifdef HAS_TM_TM_GMTOFF + ,0 + #endif + #ifdef HAS_TM_TM_ZONE + ,"UTC" + #endif +}; +#endif /* HAS_TIMEGM */ + +#endif /* TIME64_LIMITS_H */ diff --git a/asyncio_mongo/_pymongo/_cmessage.so b/asyncio_mongo/_pymongo/_cmessage.so deleted file mode 100755 index 88c35b3..0000000 Binary files a/asyncio_mongo/_pymongo/_cmessage.so and /dev/null differ diff --git a/asyncio_mongo/_pymongo/_cmessagemodule.c b/asyncio_mongo/_pymongo/_cmessagemodule.c new file mode 100644 index 0000000..80a88ee --- /dev/null +++ b/asyncio_mongo/_pymongo/_cmessagemodule.c @@ -0,0 +1,1265 @@ +/* + * Copyright 2009-2012 10gen, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains C implementations of some of the functions + * needed by the message module. If possible, these implementations + * should be used to speed up message creation. + */ + +#include "Python.h" + +#include "_cbsonmodule.h" +#include "buffer.h" + +struct module_state { + PyObject* _cbson; +}; + +/* See comments about module initialization in _cbsonmodule.c */ +#if PY_MAJOR_VERSION >= 3 +#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) +#else +#define GETSTATE(m) (&_state) +static struct module_state _state; +#endif + +#if PY_MAJOR_VERSION >= 3 +#define BYTES_FORMAT_STRING "y#" +#else +#define BYTES_FORMAT_STRING "s#" +#endif + +#define DOC_TOO_LARGE_FMT "BSON document too large (%d bytes)" \ + " - the connected server supports" \ + " BSON document sizes up to %ld bytes." + +/* Get an error class from the pymongo.errors module. + * + * Returns a new ref */ +static PyObject* _error(char* name) { + PyObject* error; + PyObject* errors = PyImport_ImportModule("pymongo.errors"); + if (!errors) { + return NULL; + } + error = PyObject_GetAttrString(errors, name); + Py_DECREF(errors); + return error; +} + +/* add a lastError message on the end of the buffer. + * returns 0 on failure */ +static int add_last_error(PyObject* self, buffer_t buffer, + int request_id, char* ns, int nslen, PyObject* args) { + struct module_state *state = GETSTATE(self); + + int message_start; + int document_start; + int message_length; + int document_length; + PyObject* key; + PyObject* value; + Py_ssize_t pos = 0; + PyObject* one; + char *p = strchr(ns, '.'); + /* Length of the database portion of ns. */ + nslen = p ? (int)(p - ns) : nslen; + + message_start = buffer_save_space(buffer, 4); + if (message_start == -1) { + PyErr_NoMemory(); + return 0; + } + if (!buffer_write_bytes(buffer, (const char*)&request_id, 4) || + !buffer_write_bytes(buffer, + "\x00\x00\x00\x00" /* responseTo */ + "\xd4\x07\x00\x00" /* opcode */ + "\x00\x00\x00\x00", /* options */ + 12) || + !buffer_write_bytes(buffer, + ns, nslen) || /* database */ + !buffer_write_bytes(buffer, + ".$cmd\x00" /* collection name */ + "\x00\x00\x00\x00" /* skip */ + "\xFF\xFF\xFF\xFF", /* limit (-1) */ + 14)) { + return 0; + } + + /* save space for length */ + document_start = buffer_save_space(buffer, 4); + if (document_start == -1) { + PyErr_NoMemory(); + return 0; + } + + /* getlasterror: 1 */ + if (!(one = PyLong_FromLong(1))) + return 0; + if (!write_pair(state->_cbson, buffer, "getlasterror", 12, one, 0, 4, 1)) { + Py_DECREF(one); + return 0; + } + Py_DECREF(one); + + /* getlasterror options */ + while (PyDict_Next(args, &pos, &key, &value)) { + if (!decode_and_write_pair(state->_cbson, buffer, key, value, 0, 4, 0)) { + return 0; + } + } + + /* EOD */ + if (!buffer_write_bytes(buffer, "\x00", 1)) { + return 0; + } + + message_length = buffer_get_position(buffer) - message_start; + document_length = buffer_get_position(buffer) - document_start; + memcpy(buffer_get_buffer(buffer) + message_start, &message_length, 4); + memcpy(buffer_get_buffer(buffer) + document_start, &document_length, 4); + return 1; +} + +static int init_insert_buffer(buffer_t buffer, int request_id, int options, + const char* coll_name, int coll_name_len) { + /* Save space for message length */ + int length_location = buffer_save_space(buffer, 4); + if (length_location == -1) { + PyErr_NoMemory(); + return length_location; + } + if (!buffer_write_bytes(buffer, (const char*)&request_id, 4) || + !buffer_write_bytes(buffer, + "\x00\x00\x00\x00" + "\xd2\x07\x00\x00", + 8) || + !buffer_write_bytes(buffer, (const char*)&options, 4) || + !buffer_write_bytes(buffer, + coll_name, + coll_name_len + 1)) { + return -1; + } + return length_location; +} + +static PyObject* _cbson_insert_message(PyObject* self, PyObject* args) { + /* Note: As of PyMongo 2.6, this function is no longer used. It + * is being kept (with tests) for backwards compatibility with 3rd + * party libraries that may currently be using it, but will likely + * be removed in a future release. */ + struct module_state *state = GETSTATE(self); + + /* NOTE just using a random number as the request_id */ + int request_id = rand(); + char* collection_name = NULL; + int collection_name_length; + PyObject* docs; + PyObject* doc; + PyObject* iterator; + int before, cur_size, max_size = 0; + int options = 0; + unsigned char check_keys; + unsigned char safe; + unsigned char continue_on_error; + unsigned char uuid_subtype; + PyObject* last_error_args; + buffer_t buffer; + int length_location, message_length; + PyObject* result; + + if (!PyArg_ParseTuple(args, "et#ObbObb", + "utf-8", + &collection_name, + &collection_name_length, + &docs, &check_keys, &safe, + &last_error_args, + &continue_on_error, &uuid_subtype)) { + return NULL; + } + if (continue_on_error) { + options += 1; + } + + buffer = buffer_new(); + if (!buffer) { + PyErr_NoMemory(); + PyMem_Free(collection_name); + return NULL; + } + + length_location = init_insert_buffer(buffer, + request_id, + options, + collection_name, + collection_name_length); + if (length_location == -1) { + PyMem_Free(collection_name); + buffer_free(buffer); + return NULL; + } + + iterator = PyObject_GetIter(docs); + if (iterator == NULL) { + PyObject* InvalidOperation = _error("InvalidOperation"); + if (InvalidOperation) { + PyErr_SetString(InvalidOperation, "input is not iterable"); + Py_DECREF(InvalidOperation); + } + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + while ((doc = PyIter_Next(iterator)) != NULL) { + before = buffer_get_position(buffer); + if (!write_dict(state->_cbson, buffer, doc, check_keys, uuid_subtype, 1)) { + Py_DECREF(doc); + Py_DECREF(iterator); + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + Py_DECREF(doc); + cur_size = buffer_get_position(buffer) - before; + max_size = (cur_size > max_size) ? cur_size : max_size; + } + Py_DECREF(iterator); + + if (PyErr_Occurred()) { + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + + if (!max_size) { + PyObject* InvalidOperation = _error("InvalidOperation"); + if (InvalidOperation) { + PyErr_SetString(InvalidOperation, "cannot do an empty bulk insert"); + Py_DECREF(InvalidOperation); + } + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + + message_length = buffer_get_position(buffer) - length_location; + memcpy(buffer_get_buffer(buffer) + length_location, &message_length, 4); + + if (safe) { + if (!add_last_error(self, buffer, request_id, collection_name, + collection_name_length, last_error_args)) { + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + } + + PyMem_Free(collection_name); + + /* objectify buffer */ + result = Py_BuildValue("i" BYTES_FORMAT_STRING "i", request_id, + buffer_get_buffer(buffer), + buffer_get_position(buffer), + max_size); + buffer_free(buffer); + return result; +} + +PyDoc_STRVAR(_cbson_insert_message_doc, +"Create an insert message to be sent to MongoDB\n\ +\n\ +Note: As of PyMongo 2.6, this function is no longer used. It\n\ +is being kept (with tests) for backwards compatibility with 3rd\n\ +party libraries that may currently be using it, but will likely\n\ +be removed in a future release."); + +static PyObject* _cbson_update_message(PyObject* self, PyObject* args) { + /* NOTE just using a random number as the request_id */ + struct module_state *state = GETSTATE(self); + + int request_id = rand(); + char* collection_name = NULL; + int collection_name_length; + int before, cur_size, max_size = 0; + PyObject* doc; + PyObject* spec; + unsigned char multi; + unsigned char upsert; + unsigned char safe; + unsigned char check_keys; + unsigned char uuid_subtype; + PyObject* last_error_args; + int options; + buffer_t buffer; + int length_location, message_length; + PyObject* result; + + if (!PyArg_ParseTuple(args, "et#bbOObObb", + "utf-8", + &collection_name, + &collection_name_length, + &upsert, &multi, &spec, &doc, &safe, + &last_error_args, &check_keys, &uuid_subtype)) { + return NULL; + } + + options = 0; + if (upsert) { + options += 1; + } + if (multi) { + options += 2; + } + buffer = buffer_new(); + if (!buffer) { + PyErr_NoMemory(); + PyMem_Free(collection_name); + return NULL; + } + + // save space for message length + length_location = buffer_save_space(buffer, 4); + if (length_location == -1) { + PyMem_Free(collection_name); + PyErr_NoMemory(); + return NULL; + } + if (!buffer_write_bytes(buffer, (const char*)&request_id, 4) || + !buffer_write_bytes(buffer, + "\x00\x00\x00\x00" + "\xd1\x07\x00\x00" + "\x00\x00\x00\x00", + 12) || + !buffer_write_bytes(buffer, + collection_name, + collection_name_length + 1) || + !buffer_write_bytes(buffer, (const char*)&options, 4)) { + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + + before = buffer_get_position(buffer); + if (!write_dict(state->_cbson, buffer, spec, 0, uuid_subtype, 1)) { + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + max_size = buffer_get_position(buffer) - before; + + before = buffer_get_position(buffer); + if (!write_dict(state->_cbson, buffer, doc, check_keys, uuid_subtype, 1)) { + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + cur_size = buffer_get_position(buffer) - before; + max_size = (cur_size > max_size) ? cur_size : max_size; + + message_length = buffer_get_position(buffer) - length_location; + memcpy(buffer_get_buffer(buffer) + length_location, &message_length, 4); + + if (safe) { + if (!add_last_error(self, buffer, request_id, collection_name, + collection_name_length, last_error_args)) { + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + } + + PyMem_Free(collection_name); + + /* objectify buffer */ + result = Py_BuildValue("i" BYTES_FORMAT_STRING "i", request_id, + buffer_get_buffer(buffer), + buffer_get_position(buffer), + max_size); + buffer_free(buffer); + return result; +} + +static PyObject* _cbson_query_message(PyObject* self, PyObject* args) { + /* NOTE just using a random number as the request_id */ + struct module_state *state = GETSTATE(self); + + int request_id = rand(); + unsigned int options; + char* collection_name = NULL; + int collection_name_length; + int begin, cur_size, max_size = 0; + int num_to_skip; + int num_to_return; + PyObject* query; + PyObject* field_selector = Py_None; + unsigned char uuid_subtype = 3; + buffer_t buffer; + int length_location, message_length; + PyObject* result; + + if (!PyArg_ParseTuple(args, "Iet#iiO|Ob", + &options, + "utf-8", + &collection_name, + &collection_name_length, + &num_to_skip, &num_to_return, + &query, &field_selector, &uuid_subtype)) { + return NULL; + } + buffer = buffer_new(); + if (!buffer) { + PyErr_NoMemory(); + PyMem_Free(collection_name); + return NULL; + } + + // save space for message length + length_location = buffer_save_space(buffer, 4); + if (length_location == -1) { + PyMem_Free(collection_name); + PyErr_NoMemory(); + return NULL; + } + if (!buffer_write_bytes(buffer, (const char*)&request_id, 4) || + !buffer_write_bytes(buffer, "\x00\x00\x00\x00\xd4\x07\x00\x00", 8) || + !buffer_write_bytes(buffer, (const char*)&options, 4) || + !buffer_write_bytes(buffer, collection_name, + collection_name_length + 1) || + !buffer_write_bytes(buffer, (const char*)&num_to_skip, 4) || + !buffer_write_bytes(buffer, (const char*)&num_to_return, 4)) { + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + + begin = buffer_get_position(buffer); + if (!write_dict(state->_cbson, buffer, query, 0, uuid_subtype, 1)) { + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + max_size = buffer_get_position(buffer) - begin; + + if (field_selector != Py_None) { + begin = buffer_get_position(buffer); + if (!write_dict(state->_cbson, buffer, field_selector, 0, uuid_subtype, 1)) { + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + cur_size = buffer_get_position(buffer) - begin; + max_size = (cur_size > max_size) ? cur_size : max_size; + } + + PyMem_Free(collection_name); + + message_length = buffer_get_position(buffer) - length_location; + memcpy(buffer_get_buffer(buffer) + length_location, &message_length, 4); + + /* objectify buffer */ + result = Py_BuildValue("i" BYTES_FORMAT_STRING "i", request_id, + buffer_get_buffer(buffer), + buffer_get_position(buffer), + max_size); + buffer_free(buffer); + return result; +} + +static PyObject* _cbson_get_more_message(PyObject* self, PyObject* args) { + /* NOTE just using a random number as the request_id */ + int request_id = rand(); + char* collection_name = NULL; + int collection_name_length; + int num_to_return; + long long cursor_id; + buffer_t buffer; + int length_location, message_length; + PyObject* result; + + if (!PyArg_ParseTuple(args, "et#iL", + "utf-8", + &collection_name, + &collection_name_length, + &num_to_return, + &cursor_id)) { + return NULL; + } + buffer = buffer_new(); + if (!buffer) { + PyErr_NoMemory(); + PyMem_Free(collection_name); + return NULL; + } + + // save space for message length + length_location = buffer_save_space(buffer, 4); + if (length_location == -1) { + PyMem_Free(collection_name); + PyErr_NoMemory(); + return NULL; + } + if (!buffer_write_bytes(buffer, (const char*)&request_id, 4) || + !buffer_write_bytes(buffer, + "\x00\x00\x00\x00" + "\xd5\x07\x00\x00" + "\x00\x00\x00\x00", 12) || + !buffer_write_bytes(buffer, + collection_name, + collection_name_length + 1) || + !buffer_write_bytes(buffer, (const char*)&num_to_return, 4) || + !buffer_write_bytes(buffer, (const char*)&cursor_id, 8)) { + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; + } + + PyMem_Free(collection_name); + + message_length = buffer_get_position(buffer) - length_location; + memcpy(buffer_get_buffer(buffer) + length_location, &message_length, 4); + + /* objectify buffer */ + result = Py_BuildValue("i" BYTES_FORMAT_STRING, request_id, + buffer_get_buffer(buffer), + buffer_get_position(buffer)); + buffer_free(buffer); + return result; +} + +static void +_set_document_too_large(int size, long max) { + PyObject* InvalidDocument = _error("InvalidDocument"); + if (InvalidDocument) { +#if PY_MAJOR_VERSION >= 3 + PyObject* error = PyUnicode_FromFormat(DOC_TOO_LARGE_FMT, size, max); +#else + PyObject* error = PyString_FromFormat(DOC_TOO_LARGE_FMT, size, max); +#endif + if (error) { + PyErr_SetObject(InvalidDocument, error); + Py_DECREF(error); + } + Py_DECREF(InvalidDocument); + } +} + +static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) { + struct module_state *state = GETSTATE(self); + + /* NOTE just using a random number as the request_id */ + int request_id = rand(); + int options = 0; + int length_location, message_length; + int collection_name_length; + char* collection_name = NULL; + PyObject* docs; + PyObject* doc; + PyObject* iterator; + PyObject* client; + PyObject* last_error_args; + PyObject* result; + PyObject* max_bson_size_obj; + PyObject* max_message_size_obj; + PyObject* send_message_result; + unsigned char check_keys; + unsigned char safe; + unsigned char continue_on_error; + unsigned char uuid_subtype; + unsigned char empty = 1; + long max_bson_size; + long max_message_size; + buffer_t buffer; + PyObject *exc_type = NULL, *exc_value = NULL, *exc_trace = NULL; + + if (!PyArg_ParseTuple(args, "et#ObbObbO", + "utf-8", + &collection_name, + &collection_name_length, + &docs, &check_keys, &safe, + &last_error_args, + &continue_on_error, + &uuid_subtype, &client)) { + return NULL; + } + if (continue_on_error) { + options += 1; + } + + max_bson_size_obj = PyObject_GetAttrString(client, "max_bson_size"); +#if PY_MAJOR_VERSION >= 3 + max_bson_size = PyLong_AsLong(max_bson_size_obj); +#else + max_bson_size = PyInt_AsLong(max_bson_size_obj); +#endif + Py_XDECREF(max_bson_size_obj); + if (max_bson_size == -1) { + PyMem_Free(collection_name); + return NULL; + } + + max_message_size_obj = PyObject_GetAttrString(client, "max_message_size"); +#if PY_MAJOR_VERSION >= 3 + max_message_size = PyLong_AsLong(max_message_size_obj); +#else + max_message_size = PyInt_AsLong(max_message_size_obj); +#endif + Py_XDECREF(max_message_size_obj); + if (max_message_size == -1) { + PyMem_Free(collection_name); + return NULL; + } + + buffer = buffer_new(); + if (!buffer) { + PyErr_NoMemory(); + PyMem_Free(collection_name); + return NULL; + } + + length_location = init_insert_buffer(buffer, + request_id, + options, + collection_name, + collection_name_length); + if (length_location == -1) { + goto insertfail; + } + + iterator = PyObject_GetIter(docs); + if (iterator == NULL) { + PyObject* InvalidOperation = _error("InvalidOperation"); + if (InvalidOperation) { + PyErr_SetString(InvalidOperation, "input is not iterable"); + Py_DECREF(InvalidOperation); + } + goto insertfail; + } + while ((doc = PyIter_Next(iterator)) != NULL) { + int before = buffer_get_position(buffer); + int cur_size; + empty = 0; + if (!write_dict(state->_cbson, buffer, doc, check_keys, uuid_subtype, 1)) { + Py_DECREF(doc); + goto iterfail; + } + Py_DECREF(doc); + + cur_size = buffer_get_position(buffer) - before; + if (cur_size > max_bson_size) { + _set_document_too_large(cur_size, max_bson_size); + goto iterfail; + } + + /* We have enough data, send this batch. */ + if (buffer_get_position(buffer) > max_message_size) { + int new_request_id = rand(); + int message_start; + PyObject* send_gle = Py_False; + buffer_t new_buffer = buffer_new(); + if (!new_buffer) { + PyErr_NoMemory(); + goto iterfail; + } + message_start = init_insert_buffer(new_buffer, + new_request_id, + options, + collection_name, + collection_name_length); + if (message_start == -1) { + buffer_free(new_buffer); + goto iterfail; + } + + /* Copy the overflow encoded document into the new buffer. */ + if (!buffer_write_bytes(new_buffer, + (const char*)buffer_get_buffer(buffer) + before, cur_size)) { + buffer_free(new_buffer); + goto iterfail; + } + + /* Roll back to the beginning of this document. */ + buffer_update_position(buffer, before); + message_length = buffer_get_position(buffer) - length_location; + memcpy(buffer_get_buffer(buffer) + length_location, &message_length, 4); + + /* If we are doing unacknowledged writes *and* continue_on_error + * is True it's pointless (and slower) to send GLE. */ + if (safe || !continue_on_error) { + send_gle = Py_True; + if (!add_last_error(self, buffer, request_id, collection_name, + collection_name_length, last_error_args)) { + buffer_free(new_buffer); + goto iterfail; + } + } + /* Objectify buffer */ + result = Py_BuildValue("i" BYTES_FORMAT_STRING, request_id, + buffer_get_buffer(buffer), + buffer_get_position(buffer)); + buffer_free(buffer); + buffer = new_buffer; + request_id = new_request_id; + length_location = message_start; + + send_message_result = PyObject_CallMethod(client, "_send_message", + "NO", result, send_gle); + + if (!send_message_result) { + PyObject *etype = NULL, *evalue = NULL, *etrace = NULL; + PyObject* OperationFailure; + PyErr_Fetch(&etype, &evalue, &etrace); + OperationFailure = _error("OperationFailure"); + if (OperationFailure) { + if (PyErr_GivenExceptionMatches(etype, OperationFailure)) { + if (!safe || continue_on_error) { + Py_DECREF(OperationFailure); + if (!safe) { + /* We're doing unacknowledged writes and + * continue_on_error is False. Just return. */ + Py_DECREF(etype); + Py_XDECREF(evalue); + Py_XDECREF(etrace); + Py_DECREF(iterator); + buffer_free(buffer); + PyMem_Free(collection_name); + Py_RETURN_NONE; + } + /* continue_on_error is True, store the error + * details to re-raise after the final batch */ + Py_XDECREF(exc_type); + Py_XDECREF(exc_value); + Py_XDECREF(exc_trace); + exc_type = etype; + exc_value = evalue; + exc_trace = etrace; + continue; + } + } + Py_DECREF(OperationFailure); + } + /* This isn't OperationFailure, we couldn't + * import OperationFailure, or we are doing + * acknowledged writes. Re-raise immediately. */ + PyErr_Restore(etype, evalue, etrace); + goto iterfail; + } else { + Py_DECREF(send_message_result); + } + } + } + Py_DECREF(iterator); + + if (PyErr_Occurred()) { + goto insertfail; + } + + if (empty) { + PyObject* InvalidOperation = _error("InvalidOperation"); + if (InvalidOperation) { + PyErr_SetString(InvalidOperation, "cannot do an empty bulk insert"); + Py_DECREF(InvalidOperation); + } + goto insertfail; + } + + message_length = buffer_get_position(buffer) - length_location; + memcpy(buffer_get_buffer(buffer) + length_location, &message_length, 4); + + if (safe) { + if (!add_last_error(self, buffer, request_id, collection_name, + collection_name_length, last_error_args)) { + goto insertfail; + } + } + + PyMem_Free(collection_name); + + /* objectify buffer */ + result = Py_BuildValue("i" BYTES_FORMAT_STRING, request_id, + buffer_get_buffer(buffer), + buffer_get_position(buffer)); + buffer_free(buffer); + + /* Send the last (or only) batch */ + send_message_result = PyObject_CallMethod(client, "_send_message", "NN", + result, + PyBool_FromLong((long)safe)); + + if (!send_message_result) { + Py_XDECREF(exc_type); + Py_XDECREF(exc_value); + Py_XDECREF(exc_trace); + return NULL; + } else { + Py_DECREF(send_message_result); + } + + if (exc_type) { + /* Re-raise any previously stored exception + * due to continue_on_error being True */ + PyErr_Restore(exc_type, exc_value, exc_trace); + return NULL; + } + + Py_RETURN_NONE; + +iterfail: + Py_DECREF(iterator); +insertfail: + Py_XDECREF(exc_type); + Py_XDECREF(exc_value); + Py_XDECREF(exc_trace); + buffer_free(buffer); + PyMem_Free(collection_name); + return NULL; +} + +static PyObject* +_send_write_command(PyObject* client, buffer_t buffer, + int lst_len_loc, int cmd_len_loc, unsigned char* errors) { + + PyObject* msg; + PyObject* result; + PyObject* ok; + + int request_id = rand(); + int position = buffer_get_position(buffer); + int length = position - lst_len_loc - 1; + memcpy(buffer_get_buffer(buffer) + lst_len_loc, &length, 4); + length = position - cmd_len_loc; + memcpy(buffer_get_buffer(buffer) + cmd_len_loc, &length, 4); + memcpy(buffer_get_buffer(buffer), &position, 4); + memcpy(buffer_get_buffer(buffer) + 4, &request_id, 4); + + /* objectify buffer */ + msg = Py_BuildValue("i" BYTES_FORMAT_STRING, request_id, + buffer_get_buffer(buffer), + buffer_get_position(buffer)); + if (!msg) + return NULL; + + /* Send the current batch */ + result = PyObject_CallMethod(client, "_send_message", + "NOO", msg, Py_True, Py_True); + if (result && PyDict_GetItemString(result, "writeErrors")) + *errors = 1; + return result; +} + +static buffer_t +_command_buffer_new(char* ns, int ns_len) { + buffer_t buffer; + if (!(buffer = buffer_new())) { + PyErr_NoMemory(); + return NULL; + } + /* Save space for message length and request id */ + if ((buffer_save_space(buffer, 8)) == -1) { + PyErr_NoMemory(); + buffer_free(buffer); + return NULL; + } + if (!buffer_write_bytes(buffer, + "\x00\x00\x00\x00" /* responseTo */ + "\xd4\x07\x00\x00" /* opcode */ + "\x00\x00\x00\x00", /* options */ + 12) || + !buffer_write_bytes(buffer, + ns, ns_len + 1) || /* namespace */ + !buffer_write_bytes(buffer, + "\x00\x00\x00\x00" /* skip */ + "\xFF\xFF\xFF\xFF", /* limit (-1) */ + 8)) { + buffer_free(buffer); + return NULL; + } + return buffer; +} + +#define _INSERT 0 +#define _UPDATE 1 +#define _DELETE 2 + +static PyObject* +_cbson_do_batched_write_command(PyObject* self, PyObject* args) { + struct module_state *state = GETSTATE(self); + + long max_bson_size; + long max_cmd_size; + long idx_offset = 0; + int idx = 0; + int cmd_len_loc; + int lst_len_loc; + int ns_len; + int ordered; + char *ns = NULL; + PyObject* max_bson_size_obj; + PyObject* command; + PyObject* doc; + PyObject* docs; + PyObject* client; + PyObject* iterator; + PyObject* result; + PyObject* results; + unsigned char op; + unsigned char check_keys; + unsigned char uuid_subtype; + unsigned char empty = 1; + unsigned char errors = 0; + buffer_t buffer; + + if (!PyArg_ParseTuple(args, "et#bOObbO", "utf-8", + &ns, &ns_len, &op, &command, &docs, + &check_keys, &uuid_subtype, &client)) { + return NULL; + } + + max_bson_size_obj = PyObject_GetAttrString(client, "max_bson_size"); +#if PY_MAJOR_VERSION >= 3 + max_bson_size = PyLong_AsLong(max_bson_size_obj); +#else + max_bson_size = PyInt_AsLong(max_bson_size_obj); +#endif + Py_XDECREF(max_bson_size_obj); + if (max_bson_size == -1) { + PyMem_Free(ns); + return NULL; + } + /* + * Max BSON object size + 16k - 2 bytes for ending NUL bytes + * XXX: This should come from the server - SERVER-10643 + */ + max_cmd_size = max_bson_size + 16382; + + /* Default to True */ + ordered = !((PyDict_GetItemString(command, "ordered")) == Py_False); + + if (!(results = PyList_New(0))) { + PyMem_Free(ns); + return NULL; + } + + if (!(buffer = _command_buffer_new(ns, ns_len))) { + PyMem_Free(ns); + Py_DECREF(results); + return NULL; + } + + PyMem_Free(ns); + + /* Position of command document length */ + cmd_len_loc = buffer_get_position(buffer); + if (!write_dict(state->_cbson, buffer, command, 0, uuid_subtype, 0)) { + goto cmdfail; + } + + /* Write type byte for array */ + *(buffer_get_buffer(buffer) + (buffer_get_position(buffer) - 1)) = 0x4; + + switch (op) { + case _INSERT: + { + if (!buffer_write_bytes(buffer, "documents\x00", 10)) + goto cmdfail; + break; + } + case _UPDATE: + { + /* MongoDB does key validation for update. */ + check_keys = 0; + if (!buffer_write_bytes(buffer, "updates\x00", 8)) + goto cmdfail; + break; + } + case _DELETE: + { + /* Never check keys in a delete command. */ + check_keys = 0; + if (!buffer_write_bytes(buffer, "deletes\x00", 8)) + goto cmdfail; + break; + } + default: + { + PyObject* InvalidOperation = _error("InvalidOperation"); + if (InvalidOperation) { + PyErr_SetString(InvalidOperation, "Unknown command"); + Py_DECREF(InvalidOperation); + } + goto cmdfail; + } + } + + /* Save space for list document */ + lst_len_loc = buffer_save_space(buffer, 4); + if (lst_len_loc == -1) { + PyErr_NoMemory(); + goto cmdfail; + } + + iterator = PyObject_GetIter(docs); + if (iterator == NULL) { + PyObject* InvalidOperation = _error("InvalidOperation"); + if (InvalidOperation) { + PyErr_SetString(InvalidOperation, "input is not iterable"); + Py_DECREF(InvalidOperation); + } + goto cmdfail; + } + while ((doc = PyIter_Next(iterator)) != NULL) { + int sub_doc_begin = buffer_get_position(buffer); + int cur_doc_begin; + int cur_size; + char key[16]; + empty = 0; + INT2STRING(key, idx); + if (!buffer_write_bytes(buffer, "\x03", 1) || + !buffer_write_bytes(buffer, key, (int)strlen(key) + 1)) { + Py_DECREF(doc); + goto cmditerfail; + } + cur_doc_begin = buffer_get_position(buffer); + if (!write_dict(state->_cbson, buffer, doc, + check_keys, uuid_subtype, 1)) { + Py_DECREF(doc); + goto cmditerfail; + } + Py_DECREF(doc); + + /* We have enough data, maybe send this batch. */ + if (buffer_get_position(buffer) > max_cmd_size) { + buffer_t new_buffer; + cur_size = buffer_get_position(buffer) - cur_doc_begin; + + /* This single document is too large for the command. */ + if (!idx) { + if (op == _INSERT) { + _set_document_too_large(cur_size, max_bson_size); + } else { + PyObject* InvalidDocument = _error("InvalidDocument"); + if (InvalidDocument) { + /* + * There's nothing intelligent we can say + * about size for update and remove. + */ + PyErr_SetString(InvalidDocument, + "command document too large"); + Py_DECREF(InvalidDocument); + } + } + goto cmditerfail; + } + + if (!(new_buffer = buffer_new())) { + PyErr_NoMemory(); + goto cmditerfail; + } + /* New buffer including the current overflow document */ + if (!buffer_write_bytes(new_buffer, + (const char*)buffer_get_buffer(buffer), lst_len_loc + 5) || + !buffer_write_bytes(new_buffer, "0\x00", 2) || + !buffer_write_bytes(new_buffer, + (const char*)buffer_get_buffer(buffer) + cur_doc_begin, cur_size)) { + buffer_free(new_buffer); + goto cmditerfail; + } + /* + * Roll the existing buffer back to the beginning + * of the last document encoded. + */ + buffer_update_position(buffer, sub_doc_begin); + + if (!buffer_write_bytes(buffer, "\x00\x00", 2)) + goto cmditerfail; + + result = _send_write_command(client, buffer, + lst_len_loc, cmd_len_loc, &errors); + + buffer_free(buffer); + buffer = new_buffer; + + if (!result) + goto cmditerfail; + +#if PY_MAJOR_VERSION >= 3 + result = Py_BuildValue("NN", + PyLong_FromLong(idx_offset), result); +#else + result = Py_BuildValue("NN", + PyInt_FromLong(idx_offset), result); +#endif + if (!result) + goto cmditerfail; + + PyList_Append(results, result); + Py_DECREF(result); + + if (errors && ordered) { + Py_DECREF(iterator); + buffer_free(buffer); + return results; + } + idx_offset += idx; + idx = 0; + } + idx += 1; + } + Py_DECREF(iterator); + + if (PyErr_Occurred()) { + goto cmdfail; + } + + if (empty) { + PyObject* InvalidOperation = _error("InvalidOperation"); + if (InvalidOperation) { + PyErr_SetString(InvalidOperation, "cannot do an empty bulk write"); + Py_DECREF(InvalidOperation); + } + goto cmdfail; + } + + if (!buffer_write_bytes(buffer, "\x00\x00", 2)) + goto cmdfail; + + result = _send_write_command(client, buffer, + lst_len_loc, cmd_len_loc, &errors); + if (!result) + goto cmdfail; + +#if PY_MAJOR_VERSION >= 3 + result = Py_BuildValue("NN", PyLong_FromLong(idx_offset), result); +#else + result = Py_BuildValue("NN", PyInt_FromLong(idx_offset), result); +#endif + if (!result) + goto cmdfail; + + buffer_free(buffer); + + PyList_Append(results, result); + Py_DECREF(result); + return results; + +cmditerfail: + Py_DECREF(iterator); +cmdfail: + Py_DECREF(results); + buffer_free(buffer); + return NULL; +} + +static PyMethodDef _CMessageMethods[] = { + {"_insert_message", _cbson_insert_message, METH_VARARGS, + _cbson_insert_message_doc}, + {"_update_message", _cbson_update_message, METH_VARARGS, + "create an update message to be sent to MongoDB"}, + {"_query_message", _cbson_query_message, METH_VARARGS, + "create a query message to be sent to MongoDB"}, + {"_get_more_message", _cbson_get_more_message, METH_VARARGS, + "create a get more message to be sent to MongoDB"}, + {"_do_batched_insert", _cbson_do_batched_insert, METH_VARARGS, + "insert a batch of documents, splitting the batch as needed"}, + {"_do_batched_write_command", _cbson_do_batched_write_command, METH_VARARGS, + "execute a batch of insert, update, or delete commands"}, + {NULL, NULL, 0, NULL} +}; + +#if PY_MAJOR_VERSION >= 3 +#define INITERROR return NULL +static int _cmessage_traverse(PyObject *m, visitproc visit, void *arg) { + Py_VISIT(GETSTATE(m)->_cbson); + return 0; +} + +static int _cmessage_clear(PyObject *m) { + Py_CLEAR(GETSTATE(m)->_cbson); + return 0; +} + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_cmessage", + NULL, + sizeof(struct module_state), + _CMessageMethods, + NULL, + _cmessage_traverse, + _cmessage_clear, + NULL +}; + +PyMODINIT_FUNC +PyInit__cmessage(void) +#else +#define INITERROR return +PyMODINIT_FUNC +init_cmessage(void) +#endif +{ + PyObject *_cbson; + PyObject *c_api_object; + PyObject *m; + struct module_state *state; + + /* Store a reference to the _cbson module since it's needed to call some + * of its functions + */ + _cbson = PyImport_ImportModule("bson._cbson"); + if (_cbson == NULL) { + INITERROR; + } + + /* Import C API of _cbson + * The header file accesses _cbson_API to call the functions + */ + c_api_object = PyObject_GetAttrString(_cbson, "_C_API"); + if (c_api_object == NULL) { + Py_DECREF(_cbson); + INITERROR; + } +#if PY_VERSION_HEX >= 0x03010000 + _cbson_API = (void **)PyCapsule_GetPointer(c_api_object, "_cbson._C_API"); +#else + _cbson_API = (void **)PyCObject_AsVoidPtr(c_api_object); +#endif + if (_cbson_API == NULL) { + Py_DECREF(c_api_object); + Py_DECREF(_cbson); + INITERROR; + } + +#if PY_MAJOR_VERSION >= 3 + m = PyModule_Create(&moduledef); +#else + m = Py_InitModule("_cmessage", _CMessageMethods); +#endif + if (m == NULL) { + Py_DECREF(c_api_object); + Py_DECREF(_cbson); + INITERROR; + } + + state = GETSTATE(m); + state->_cbson = _cbson; + + Py_DECREF(c_api_object); + +#if PY_MAJOR_VERSION >= 3 + return m; +#endif +} diff --git a/setup.py b/setup.py index 026ab0a..948199a 100755 --- a/setup.py +++ b/setup.py @@ -12,11 +12,16 @@ from distutils.errors import CCompilerError from distutils.errors import DistutilsPlatformError, DistutilsExecError from distutils.core import Extension -requirements = ["asyncio"] + +requirements = [] try: import xml.etree.ElementTree except ImportError: requirements.append("elementtree") +try: + import asyncio +except ImportError: + requirements.append("asyncio") if sys.platform == 'win32' and sys.version_info > (2, 6): @@ -69,11 +74,16 @@ although they do result in significant speed improvements. c_ext = Feature( "optional C extension", standard=True, - ext_modules=[Extension('txmongo._pymongo._cbson', - include_dirs=['txmongo/_pymongo'], - sources=['txmongo/_pymongo/_cbsonmodule.c', - 'txmongo/_pymongo/time_helpers.c', - 'txmongo/_pymongo/encoding_helpers.c'])]) + ext_modules=[Extension('asyncio_mongo._bson._cbson', + include_dirs=['asyncio_mongo/_bson'], + sources=['asyncio_mongo/_bson/_cbsonmodule.c', + 'asyncio_mongo/_bson/time64.c', + 'asyncio_mongo/_bson/buffer.c', + 'asyncio_mongo/_bson/encoding_helpers.c']), + Extension('asyncio_mongo._pymongo._cmessage', + include_dirs=['bson'], + sources=['asyncio_mongo._pymongo/_cmessagemodule.c', + 'asyncio_mongo/_bson/buffer.c'])]) if "--no_ext" in sys.argv: sys.argv = [x for x in sys.argv if x != "--no_ext"] @@ -83,13 +93,13 @@ else: setup( name="asyncio-mongo", - version="0.1.0", + version="0.1-dev", description="Asynchronous Python 3.3+ driver for MongoDB ", author="Alexandre Fiori, Don Brown", author_email="mrdon@twdata.org", url="https://bitbucket.org/mrdon/asyncio-mongo", keywords=["mongo", "mongodb", "pymongo", "gridfs", "asyncio_mongo", "asyncio"], - packages=["asyncio_mongo", "asyncio_mongo._pymongo", "asyncio_mongo._gridfs", "asyncio_mongo._bson"], + packages=["asyncio_mongo", "asyncio_mongo._pymongo", "asyncio_mongo._bson"], install_requires=requirements, features=features, license="Apache License, Version 2.0",