/* * Copyright 2009-2012 10gen, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * This file contains C implementations of some of the functions * needed by the bson module. If possible, these implementations * should be used to speed up BSON encoding and decoding. */ #include "Python.h" #include "datetime.h" #include "buffer.h" #include "time64.h" #include "encoding_helpers.h" #define _CBSON_MODULE #include "_cbsonmodule.h" /* New module state and initialization code. * See the module-initialization-and-state * section in the following doc: * http://docs.python.org/release/3.1.3/howto/cporting.html * which references the following pep: * http://www.python.org/dev/peps/pep-3121/ * */ struct module_state { PyObject* Binary; PyObject* Code; PyObject* ObjectId; PyObject* DBRef; PyObject* RECompile; PyObject* Regex; PyObject* UUID; PyObject* Timestamp; PyObject* MinKey; PyObject* MaxKey; PyObject* UTC; PyTypeObject* REType; }; /* The Py_TYPE macro was introduced in CPython 2.6 */ #ifndef Py_TYPE #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) #endif #if PY_MAJOR_VERSION >= 3 #define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) #else #define GETSTATE(m) (&_state) static struct module_state _state; #endif /* Maximum number of regex flags */ #define FLAGS_SIZE 7 #define JAVA_LEGACY 5 #define CSHARP_LEGACY 6 #define BSON_MAX_SIZE 2147483647 /* The smallest possible BSON document, i.e. "{}" */ #define BSON_MIN_SIZE 5 /* Get an error class from the bson.errors module. * * Returns a new ref */ static PyObject* _error(char* name) { PyObject* error; PyObject* errors = PyImport_ImportModule("bson.errors"); if (!errors) { return NULL; } error = PyObject_GetAttrString(errors, name); Py_DECREF(errors); return error; } /* Safely downcast from Py_ssize_t to int, setting an * exception and returning -1 on error. */ static int _downcast_and_check(Py_ssize_t size, int extra) { if (size > BSON_MAX_SIZE || ((BSON_MAX_SIZE - extra) < size)) { PyObject* InvalidStringData = _error("InvalidStringData"); if (InvalidStringData) { PyErr_SetString(InvalidStringData, "String length must be <= 2147483647"); Py_DECREF(InvalidStringData); } return -1; } return (int)size + extra; } static PyObject* elements_to_dict(PyObject* self, const char* string, unsigned max, PyObject* as_class, unsigned char tz_aware, unsigned char uuid_subtype, unsigned char compile_re); static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, unsigned char check_keys, unsigned char uuid_subtype); /* Date stuff */ static PyObject* datetime_from_millis(long long millis) { /* To encode a datetime instance like datetime(9999, 12, 31, 23, 59, 59, 999999) * we follow these steps: * 1. Calculate a timestamp in seconds: 253402300799 * 2. Multiply that by 1000: 253402300799000 * 3. Add in microseconds divided by 1000 253402300799999 * * (Note: BSON doesn't support microsecond accuracy, hence the rounding.) * * To decode we could do: * 1. Get seconds: timestamp / 1000: 253402300799 * 2. Get micros: (timestamp % 1000) * 1000: 999000 * Resulting in datetime(9999, 12, 31, 23, 59, 59, 999000) -- the expected result * * Now what if the we encode (1, 1, 1, 1, 1, 1, 111111)? * 1. and 2. gives: -62135593139000 * 3. Gives us: -62135593138889 * * Now decode: * 1. Gives us: -62135593138 * 2. Gives us: -889000 * Resulting in datetime(1, 1, 1, 1, 1, 2, 15888216) -- an invalid result * * If instead to decode we do: * diff = ((millis % 1000) + 1000) % 1000: 111 * seconds = (millis - diff) / 1000: -62135593139 * micros = diff * 1000 111000 * Resulting in datetime(1, 1, 1, 1, 1, 1, 111000) -- the expected result */ int diff = (int)(((millis % 1000) + 1000) % 1000); int microseconds = diff * 1000; Time64_T seconds = (millis - diff) / 1000; struct TM timeinfo; gmtime64_r(&seconds, &timeinfo); return PyDateTime_FromDateAndTime(timeinfo.tm_year + 1900, timeinfo.tm_mon + 1, timeinfo.tm_mday, timeinfo.tm_hour, timeinfo.tm_min, timeinfo.tm_sec, microseconds); } static long long millis_from_datetime(PyObject* datetime) { struct TM timeinfo; long long millis; timeinfo.tm_year = PyDateTime_GET_YEAR(datetime) - 1900; timeinfo.tm_mon = PyDateTime_GET_MONTH(datetime) - 1; timeinfo.tm_mday = PyDateTime_GET_DAY(datetime); timeinfo.tm_hour = PyDateTime_DATE_GET_HOUR(datetime); timeinfo.tm_min = PyDateTime_DATE_GET_MINUTE(datetime); timeinfo.tm_sec = PyDateTime_DATE_GET_SECOND(datetime); millis = timegm64(&timeinfo) * 1000; millis += PyDateTime_DATE_GET_MICROSECOND(datetime) / 1000; return millis; } /* Just make this compatible w/ the old API. */ int buffer_write_bytes(buffer_t buffer, const char* data, int size) { if (buffer_write(buffer, data, size)) { PyErr_NoMemory(); return 0; } return 1; } static int write_unicode(buffer_t buffer, PyObject* py_string) { int size; const char* data; PyObject* encoded = PyUnicode_AsUTF8String(py_string); if (!encoded) { return 0; } #if PY_MAJOR_VERSION >= 3 data = PyBytes_AS_STRING(encoded); #else data = PyString_AS_STRING(encoded); #endif if (!data) goto unicodefail; #if PY_MAJOR_VERSION >= 3 if ((size = _downcast_and_check(PyBytes_GET_SIZE(encoded), 1)) == -1) #else if ((size = _downcast_and_check(PyString_GET_SIZE(encoded), 1)) == -1) #endif goto unicodefail; if (!buffer_write_bytes(buffer, (const char*)&size, 4)) goto unicodefail; if (!buffer_write_bytes(buffer, data, size)) goto unicodefail; Py_DECREF(encoded); return 1; unicodefail: Py_DECREF(encoded); return 0; } /* returns 0 on failure */ static int write_string(buffer_t buffer, PyObject* py_string) { int size; const char* data; #if PY_MAJOR_VERSION >= 3 if (PyUnicode_Check(py_string)){ return write_unicode(buffer, py_string); } data = PyBytes_AsString(py_string); #else data = PyString_AsString(py_string); #endif if (!data) { return 0; } #if PY_MAJOR_VERSION >= 3 if ((size = _downcast_and_check(PyBytes_Size(py_string), 1)) == -1) #else if ((size = _downcast_and_check(PyString_Size(py_string), 1)) == -1) #endif return 0; if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { return 0; } if (!buffer_write_bytes(buffer, data, size)) { return 0; } return 1; } /* * Are we in the main interpreter or a sub-interpreter? * Useful for deciding if we can use cached pure python * types in mod_wsgi. */ static int _in_main_interpreter(void) { static PyInterpreterState* main_interpreter = NULL; PyInterpreterState* interpreter; if (main_interpreter == NULL) { interpreter = PyInterpreterState_Head(); while (PyInterpreterState_Next(interpreter)) interpreter = PyInterpreterState_Next(interpreter); main_interpreter = interpreter; } return (main_interpreter == PyThreadState_Get()->interp); } /* * Get a reference to a pure python type. If we are in the * main interpreter return the cached object, otherwise import * the object we need and return it instead. */ static PyObject* _get_object(PyObject* object, char* module_name, char* object_name) { if (_in_main_interpreter()) { Py_XINCREF(object); return object; } else { PyObject* imported = NULL; PyObject* module = PyImport_ImportModule(module_name); if (!module) return NULL; imported = PyObject_GetAttrString(module, object_name); Py_DECREF(module); return imported; } } /* Load a Python object to cache. * * Returns non-zero on failure. */ static int _load_object(PyObject** object, char* module_name, char* object_name) { PyObject* module; module = PyImport_ImportModule(module_name); if (!module) { return 1; } *object = PyObject_GetAttrString(module, object_name); Py_DECREF(module); return (*object) ? 0 : 2; } /* Load all Python objects to cache. * * Returns non-zero on failure. */ static int _load_python_objects(PyObject* module) { PyObject* empty_string; PyObject* compiled; struct module_state *state = GETSTATE(module); if (_load_object(&state->Binary, "bson.binary", "Binary") || _load_object(&state->Code, "bson.code", "Code") || _load_object(&state->ObjectId, "bson.objectid", "ObjectId") || _load_object(&state->DBRef, "bson.dbref", "DBRef") || _load_object(&state->Timestamp, "bson.timestamp", "Timestamp") || _load_object(&state->MinKey, "bson.min_key", "MinKey") || _load_object(&state->MaxKey, "bson.max_key", "MaxKey") || _load_object(&state->UTC, "bson.tz_util", "utc") || _load_object(&state->RECompile, "re", "compile") || _load_object(&state->Regex, "bson.regex", "Regex")) { return 1; } /* If we couldn't import uuid then we must be on 2.4. Just ignore. */ if (_load_object(&state->UUID, "uuid", "UUID") == 1) { state->UUID = NULL; PyErr_Clear(); } /* Reload our REType hack too. */ #if PY_MAJOR_VERSION >= 3 empty_string = PyBytes_FromString(""); #else empty_string = PyString_FromString(""); #endif if (empty_string == NULL) { state->REType = NULL; return 1; } compiled = PyObject_CallFunction(state->RECompile, "O", empty_string); if (compiled == NULL) { state->REType = NULL; Py_DECREF(empty_string); return 1; } Py_INCREF(Py_TYPE(compiled)); state->REType = Py_TYPE(compiled); Py_DECREF(empty_string); Py_DECREF(compiled); return 0; } static int write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, unsigned char check_keys, unsigned char uuid_subtype) { int result; if(Py_EnterRecursiveCall(" while encoding an object to BSON ")) return 0; result = _write_element_to_buffer(self, buffer, type_byte, value, check_keys, uuid_subtype); Py_LeaveRecursiveCall(); return result; } static void _fix_java(const char* in, char* out) { int i, j; for (i = 0, j = 7; i < j; i++, j--) { out[i] = in[j]; out[j] = in[i]; } for (i = 8, j = 15; i < j; i++, j--) { out[i] = in[j]; out[j] = in[i]; } } static void _set_cannot_encode(PyObject* value) { PyObject* InvalidDocument = _error("InvalidDocument"); if (InvalidDocument) { PyObject* repr = PyObject_Repr(value); if (repr) { #if PY_MAJOR_VERSION >= 3 PyObject* errmsg = PyUnicode_FromString("Cannot encode object: "); #else PyObject* errmsg = PyString_FromString("Cannot encode object: "); #endif if (errmsg) { #if PY_MAJOR_VERSION >= 3 PyObject* error = PyUnicode_Concat(errmsg, repr); if (error) { PyErr_SetObject(InvalidDocument, error); Py_DECREF(error); } Py_DECREF(errmsg); Py_DECREF(repr); #else PyString_ConcatAndDel(&errmsg, repr); if (errmsg) { PyErr_SetObject(InvalidDocument, errmsg); Py_DECREF(errmsg); } #endif } else { Py_DECREF(repr); } } Py_DECREF(InvalidDocument); } } /* * Encode a builtin Python regular expression or our custom Regex class. * * Sets exception and returns 0 on failure. */ static int _write_regex_to_buffer( buffer_t buffer, int type_byte, PyObject* value) { PyObject* py_flags; PyObject* py_pattern; PyObject* encoded_pattern; long int_flags; char flags[FLAGS_SIZE]; char check_utf8 = 0; const char* pattern_data; int pattern_length, flags_length; result_t status; /* * Both the builtin re type and our Regex class have attributes * "flags" and "pattern". */ py_flags = PyObject_GetAttrString(value, "flags"); if (!py_flags) { return 0; } #if PY_MAJOR_VERSION >= 3 int_flags = PyLong_AsLong(py_flags); #else int_flags = PyInt_AsLong(py_flags); #endif Py_DECREF(py_flags); py_pattern = PyObject_GetAttrString(value, "pattern"); if (!py_pattern) { return 0; } if (PyUnicode_Check(py_pattern)) { encoded_pattern = PyUnicode_AsUTF8String(py_pattern); Py_DECREF(py_pattern); if (!encoded_pattern) { return 0; } } else { encoded_pattern = py_pattern; check_utf8 = 1; } #if PY_MAJOR_VERSION >= 3 if (!(pattern_data = PyBytes_AsString(encoded_pattern))) { Py_DECREF(encoded_pattern); return 0; } if ((pattern_length = _downcast_and_check(PyBytes_Size(encoded_pattern), 0)) == -1) { Py_DECREF(encoded_pattern); return 0; } #else if (!(pattern_data = PyString_AsString(encoded_pattern))) { Py_DECREF(encoded_pattern); return 0; } if ((pattern_length = _downcast_and_check(PyString_Size(encoded_pattern), 0)) == -1) { Py_DECREF(encoded_pattern); return 0; } #endif status = check_string((const unsigned char*)pattern_data, pattern_length, check_utf8, 1); if (status == NOT_UTF_8) { PyObject* InvalidStringData = _error("InvalidStringData"); if (InvalidStringData) { PyErr_SetString(InvalidStringData, "regex patterns must be valid UTF-8"); Py_DECREF(InvalidStringData); } Py_DECREF(encoded_pattern); return 0; } else if (status == HAS_NULL) { PyObject* InvalidDocument = _error("InvalidDocument"); if (InvalidDocument) { PyErr_SetString(InvalidDocument, "regex patterns must not contain the NULL byte"); Py_DECREF(InvalidDocument); } Py_DECREF(encoded_pattern); return 0; } if (!buffer_write_bytes(buffer, pattern_data, pattern_length + 1)) { Py_DECREF(encoded_pattern); return 0; } Py_DECREF(encoded_pattern); flags[0] = 0; if (int_flags & 2) { STRCAT(flags, FLAGS_SIZE, "i"); } if (int_flags & 4) { STRCAT(flags, FLAGS_SIZE, "l"); } if (int_flags & 8) { STRCAT(flags, FLAGS_SIZE, "m"); } if (int_flags & 16) { STRCAT(flags, FLAGS_SIZE, "s"); } if (int_flags & 32) { STRCAT(flags, FLAGS_SIZE, "u"); } if (int_flags & 64) { STRCAT(flags, FLAGS_SIZE, "x"); } flags_length = (int)strlen(flags) + 1; if (!buffer_write_bytes(buffer, flags, flags_length)) { return 0; } *(buffer_get_buffer(buffer) + type_byte) = 0x0B; return 1; } /* TODO our platform better be little-endian w/ 4-byte ints! */ /* Write a single value to the buffer (also write its type_byte, for which * space has already been reserved. * * returns 0 on failure */ static int _write_element_to_buffer(PyObject* self, buffer_t buffer, int type_byte, PyObject* value, unsigned char check_keys, unsigned char uuid_subtype) { struct module_state *state = GETSTATE(self); /* * Don't use PyObject_IsInstance for our custom types. It causes * problems with python sub interpreters. Our custom types should * have a _type_marker attribute, which we can switch on instead. */ if (PyObject_HasAttrString(value, "_type_marker")) { long type; PyObject* type_marker = PyObject_GetAttrString(value, "_type_marker"); if (type_marker == NULL) return 0; #if PY_MAJOR_VERSION >= 3 type = PyLong_AsLong(type_marker); #else type = PyInt_AsLong(type_marker); #endif Py_DECREF(type_marker); /* * Py(Long|Int)_AsLong returns -1 for error but -1 is a valid value * so we call PyErr_Occurred to differentiate. * * One potential reason for an error is the user passing an invalid * type that overrides __getattr__ (e.g. pymongo.collection.Collection) */ if (type == -1 && PyErr_Occurred()) { PyErr_Clear(); _set_cannot_encode(value); return 0; } switch (type) { case 5: { /* Binary */ PyObject* subtype_object; long subtype; const char* data; int size; *(buffer_get_buffer(buffer) + type_byte) = 0x05; subtype_object = PyObject_GetAttrString(value, "subtype"); if (!subtype_object) { return 0; } #if PY_MAJOR_VERSION >= 3 subtype = PyLong_AsLong(subtype_object); #else subtype = PyInt_AsLong(subtype_object); #endif if (subtype == -1) { Py_DECREF(subtype_object); return 0; } #if PY_MAJOR_VERSION >= 3 size = _downcast_and_check(PyBytes_Size(value), 0); #else size = _downcast_and_check(PyString_Size(value), 0); #endif if (size == -1) { Py_DECREF(subtype_object); return 0; } Py_DECREF(subtype_object); if (subtype == 2) { #if PY_MAJOR_VERSION >= 3 int other_size = _downcast_and_check(PyBytes_Size(value), 4); #else int other_size = _downcast_and_check(PyString_Size(value), 4); #endif if (other_size == -1) return 0; if (!buffer_write_bytes(buffer, (const char*)&other_size, 4)) { return 0; } if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { return 0; } } if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { return 0; } if (subtype != 2) { if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { return 0; } } #if PY_MAJOR_VERSION >= 3 data = PyBytes_AsString(value); #else data = PyString_AsString(value); #endif if (!data) { return 0; } if (!buffer_write_bytes(buffer, data, size)) { return 0; } return 1; } case 7: { /* ObjectId */ const char* data; PyObject* pystring = PyObject_GetAttrString(value, "_ObjectId__id"); if (!pystring) { return 0; } #if PY_MAJOR_VERSION >= 3 data = PyBytes_AsString(pystring); #else data = PyString_AsString(pystring); #endif if (!data) { Py_DECREF(pystring); return 0; } if (!buffer_write_bytes(buffer, data, 12)) { Py_DECREF(pystring); return 0; } Py_DECREF(pystring); *(buffer_get_buffer(buffer) + type_byte) = 0x07; return 1; } case 11: { /* Regex */ return _write_regex_to_buffer(buffer, type_byte, value); } case 13: { /* Code */ int start_position, length_location, length; PyObject* scope = PyObject_GetAttrString(value, "scope"); if (!scope) { return 0; } if (!PyDict_Size(scope)) { Py_DECREF(scope); *(buffer_get_buffer(buffer) + type_byte) = 0x0D; return write_string(buffer, value); } *(buffer_get_buffer(buffer) + type_byte) = 0x0F; start_position = buffer_get_position(buffer); /* save space for length */ length_location = buffer_save_space(buffer, 4); if (length_location == -1) { PyErr_NoMemory(); Py_DECREF(scope); return 0; } if (!write_string(buffer, value)) { Py_DECREF(scope); return 0; } if (!write_dict(self, buffer, scope, 0, uuid_subtype, 0)) { Py_DECREF(scope); return 0; } Py_DECREF(scope); length = buffer_get_position(buffer) - start_position; memcpy(buffer_get_buffer(buffer) + length_location, &length, 4); return 1; } case 17: { /* Timestamp */ PyObject* obj; long i; obj = PyObject_GetAttrString(value, "inc"); if (!obj) { return 0; } #if PY_MAJOR_VERSION >= 3 i = PyLong_AsLong(obj); #else i = PyInt_AsLong(obj); #endif Py_DECREF(obj); if (!buffer_write_bytes(buffer, (const char*)&i, 4)) { return 0; } obj = PyObject_GetAttrString(value, "time"); if (!obj) { return 0; } #if PY_MAJOR_VERSION >= 3 i = PyLong_AsLong(obj); #else i = PyInt_AsLong(obj); #endif Py_DECREF(obj); if (!buffer_write_bytes(buffer, (const char*)&i, 4)) { return 0; } *(buffer_get_buffer(buffer) + type_byte) = 0x11; return 1; } case 100: { /* DBRef */ PyObject* as_doc = PyObject_CallMethod(value, "as_doc", NULL); if (!as_doc) { return 0; } if (!write_dict(self, buffer, as_doc, 0, uuid_subtype, 0)) { Py_DECREF(as_doc); return 0; } Py_DECREF(as_doc); *(buffer_get_buffer(buffer) + type_byte) = 0x03; return 1; } case 255: { /* MinKey */ *(buffer_get_buffer(buffer) + type_byte) = 0xFF; return 1; } case 127: { /* MaxKey */ *(buffer_get_buffer(buffer) + type_byte) = 0x7F; return 1; } } } /* No _type_marker attibute or not one of our types. */ if (PyBool_Check(value)) { #if PY_MAJOR_VERSION >= 3 const long bool = PyLong_AsLong(value); #else const long bool = PyInt_AsLong(value); #endif const char c = bool ? 0x01 : 0x00; *(buffer_get_buffer(buffer) + type_byte) = 0x08; return buffer_write_bytes(buffer, &c, 1); } #if PY_MAJOR_VERSION >= 3 else if (PyLong_Check(value)) { const long long_value = PyLong_AsLong(value); #else else if (PyInt_Check(value)) { const long long_value = PyInt_AsLong(value); #endif const int int_value = (int)long_value; if (PyErr_Occurred() || long_value != int_value) { /* Overflow */ long long long_long_value; PyErr_Clear(); long_long_value = PyLong_AsLongLong(value); if (PyErr_Occurred()) { /* Overflow AGAIN */ PyErr_SetString(PyExc_OverflowError, "MongoDB can only handle up to 8-byte ints"); return 0; } *(buffer_get_buffer(buffer) + type_byte) = 0x12; return buffer_write_bytes(buffer, (const char*)&long_long_value, 8); } *(buffer_get_buffer(buffer) + type_byte) = 0x10; return buffer_write_bytes(buffer, (const char*)&int_value, 4); #if PY_MAJOR_VERSION < 3 } else if (PyLong_Check(value)) { const long long long_long_value = PyLong_AsLongLong(value); if (PyErr_Occurred()) { /* Overflow */ PyErr_SetString(PyExc_OverflowError, "MongoDB can only handle up to 8-byte ints"); return 0; } *(buffer_get_buffer(buffer) + type_byte) = 0x12; return buffer_write_bytes(buffer, (const char*)&long_long_value, 8); #endif } else if (PyFloat_Check(value)) { const double d = PyFloat_AsDouble(value); *(buffer_get_buffer(buffer) + type_byte) = 0x01; return buffer_write_bytes(buffer, (const char*)&d, 8); } else if (value == Py_None) { *(buffer_get_buffer(buffer) + type_byte) = 0x0A; return 1; } else if (PyDict_Check(value)) { *(buffer_get_buffer(buffer) + type_byte) = 0x03; return write_dict(self, buffer, value, check_keys, uuid_subtype, 0); } else if (PyList_Check(value) || PyTuple_Check(value)) { Py_ssize_t items, i; int start_position, length_location, length; char zero = 0; *(buffer_get_buffer(buffer) + type_byte) = 0x04; start_position = buffer_get_position(buffer); /* save space for length */ length_location = buffer_save_space(buffer, 4); if (length_location == -1) { PyErr_NoMemory(); return 0; } if ((items = PySequence_Size(value)) > BSON_MAX_SIZE) { PyObject* BSONError = _error("BSONError"); if (BSONError) { PyErr_SetString(BSONError, "Too many items to serialize."); Py_DECREF(BSONError); } return 0; } for(i = 0; i < items; i++) { int list_type_byte = buffer_save_space(buffer, 1); char name[16]; PyObject* item_value; if (list_type_byte == -1) { PyErr_NoMemory(); return 0; } INT2STRING(name, (int)i); if (!buffer_write_bytes(buffer, name, (int)strlen(name) + 1)) { return 0; } if (!(item_value = PySequence_GetItem(value, i))) return 0; if (!write_element_to_buffer(self, buffer, list_type_byte, item_value, check_keys, uuid_subtype)) { Py_DECREF(item_value); return 0; } Py_DECREF(item_value); } /* write null byte and fill in length */ if (!buffer_write_bytes(buffer, &zero, 1)) { return 0; } length = buffer_get_position(buffer) - start_position; memcpy(buffer_get_buffer(buffer) + length_location, &length, 4); return 1; #if PY_MAJOR_VERSION >= 3 /* Python3 special case. Store bytes as BSON binary subtype 0. */ } else if (PyBytes_Check(value)) { int subtype = 0; int size; const char* data = PyBytes_AS_STRING(value); if (!data) return 0; if ((size = _downcast_and_check(PyBytes_GET_SIZE(value), 0)) == -1) return 0; *(buffer_get_buffer(buffer) + type_byte) = 0x05; if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { return 0; } if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { return 0; } if (!buffer_write_bytes(buffer, data, size)) { return 0; } return 1; #else /* PyString_Check only works in Python 2.x. */ } else if (PyString_Check(value)) { result_t status; const char* data; int size; if (!(data = PyString_AS_STRING(value))) return 0; if ((size = _downcast_and_check(PyString_GET_SIZE(value), 1)) == -1) return 0; *(buffer_get_buffer(buffer) + type_byte) = 0x02; status = check_string((const unsigned char*)data, size - 1, 1, 0); if (status == NOT_UTF_8) { PyObject* InvalidStringData = _error("InvalidStringData"); if (InvalidStringData) { PyObject* repr = PyObject_Repr(value); char* repr_as_cstr = repr ? PyString_AsString(repr) : NULL; if (repr_as_cstr) { PyObject *message = PyString_FromFormat( "strings in documents must be valid UTF-8: %s", repr_as_cstr); if (message) { PyErr_SetObject(InvalidStringData, message); Py_DECREF(message); } } else { /* repr(value) failed, use a generic message. */ PyErr_SetString( InvalidStringData, "strings in documents must be valid UTF-8"); } Py_XDECREF(repr); Py_DECREF(InvalidStringData); } return 0; } if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { return 0; } if (!buffer_write_bytes(buffer, data, size)) { return 0; } return 1; #endif } else if (PyUnicode_Check(value)) { *(buffer_get_buffer(buffer) + type_byte) = 0x02; return write_unicode(buffer, value); } else if (PyDateTime_Check(value)) { long long millis; PyObject* utcoffset = PyObject_CallMethod(value, "utcoffset", NULL); if (utcoffset == NULL) return 0; if (utcoffset != Py_None) { PyObject* result = PyNumber_Subtract(value, utcoffset); Py_DECREF(utcoffset); if (!result) { return 0; } millis = millis_from_datetime(result); Py_DECREF(result); } else { millis = millis_from_datetime(value); } *(buffer_get_buffer(buffer) + type_byte) = 0x09; return buffer_write_bytes(buffer, (const char*)&millis, 8); } else if (PyObject_TypeCheck(value, state->REType)) { return _write_regex_to_buffer(buffer, type_byte, value); } /* * Try UUID last since we have to import * it if we're in a sub-interpreter. * * If we're running under python 2.4 there likely * isn't a uuid module. */ if (state->UUID) { PyObject* uuid_type = _get_object(state->UUID, "uuid", "UUID"); if (uuid_type && PyObject_IsInstance(value, uuid_type)) { /* Just a special case of Binary above, but * simpler to do as a separate case. */ PyObject* bytes; /* Could be bytes, bytearray, str... */ const char* data; /* UUID is always 16 bytes */ int size = 16; int subtype; Py_DECREF(uuid_type); if (uuid_subtype == JAVA_LEGACY || uuid_subtype == CSHARP_LEGACY) { subtype = 3; } else { subtype = uuid_subtype; } *(buffer_get_buffer(buffer) + type_byte) = 0x05; if (!buffer_write_bytes(buffer, (const char*)&size, 4)) { return 0; } if (!buffer_write_bytes(buffer, (const char*)&subtype, 1)) { return 0; } if (uuid_subtype == CSHARP_LEGACY) { /* Legacy C# byte order */ bytes = PyObject_GetAttrString(value, "bytes_le"); } else { bytes = PyObject_GetAttrString(value, "bytes"); } if (!bytes) { return 0; } #if PY_MAJOR_VERSION >= 3 /* Work around http://bugs.python.org/issue7380 */ if (PyByteArray_Check(bytes)) { data = PyByteArray_AsString(bytes); } else { data = PyBytes_AsString(bytes); } #else data = PyString_AsString(bytes); #endif if (data == NULL) { Py_DECREF(bytes); return 0; } if (uuid_subtype == JAVA_LEGACY) { /* Store in legacy java byte order. */ char as_legacy_java[16]; _fix_java(data, as_legacy_java); if (!buffer_write_bytes(buffer, as_legacy_java, size)) { Py_DECREF(bytes); return 0; } } else { if (!buffer_write_bytes(buffer, data, size)) { Py_DECREF(bytes); return 0; } } Py_DECREF(bytes); return 1; } else { Py_XDECREF(uuid_type); } } /* We can't determine value's type. Fail. */ _set_cannot_encode(value); return 0; } static int check_key_name(const char* name, int name_length) { if (name_length > 0 && name[0] == '$') { PyObject* InvalidDocument = _error("InvalidDocument"); if (InvalidDocument) { #if PY_MAJOR_VERSION >= 3 PyObject* errmsg = PyUnicode_FromFormat( "key '%s' must not start with '$'", name); #else PyObject* errmsg = PyString_FromFormat( "key '%s' must not start with '$'", name); #endif if (errmsg) { PyErr_SetObject(InvalidDocument, errmsg); Py_DECREF(errmsg); } Py_DECREF(InvalidDocument); } return 0; } if (strchr(name, '.')) { PyObject* InvalidDocument = _error("InvalidDocument"); if (InvalidDocument) { #if PY_MAJOR_VERSION >= 3 PyObject* errmsg = PyUnicode_FromFormat( "key '%s' must not contain '.'", name); #else PyObject* errmsg = PyString_FromFormat( "key '%s' must not contain '.'", name); #endif if (errmsg) { PyErr_SetObject(InvalidDocument, errmsg); Py_DECREF(errmsg); } Py_DECREF(InvalidDocument); } return 0; } return 1; } /* Write a (key, value) pair to the buffer. * * Returns 0 on failure */ int write_pair(PyObject* self, buffer_t buffer, const char* name, int name_length, PyObject* value, unsigned char check_keys, unsigned char uuid_subtype, unsigned char allow_id) { int type_byte; /* Don't write any _id elements unless we're explicitly told to - * _id has to be written first so we do so, but don't bother * deleting it from the dictionary being written. */ if (!allow_id && strcmp(name, "_id") == 0) { return 1; } type_byte = buffer_save_space(buffer, 1); if (type_byte == -1) { PyErr_NoMemory(); return 0; } if (check_keys && !check_key_name(name, name_length)) { return 0; } if (!buffer_write_bytes(buffer, name, name_length + 1)) { return 0; } if (!write_element_to_buffer(self, buffer, type_byte, value, check_keys, uuid_subtype)) { return 0; } return 1; } int decode_and_write_pair(PyObject* self, buffer_t buffer, PyObject* key, PyObject* value, unsigned char check_keys, unsigned char uuid_subtype, unsigned char top_level) { PyObject* encoded; const char* data; int size; if (PyUnicode_Check(key)) { encoded = PyUnicode_AsUTF8String(key); if (!encoded) { return 0; } #if PY_MAJOR_VERSION >= 3 if (!(data = PyBytes_AS_STRING(encoded))) { Py_DECREF(encoded); return 0; } if ((size = _downcast_and_check(PyBytes_GET_SIZE(encoded), 1)) == -1) { Py_DECREF(encoded); return 0; } #else if (!(data = PyString_AS_STRING(encoded))) { Py_DECREF(encoded); return 0; } if ((size = _downcast_and_check(PyString_GET_SIZE(encoded), 1)) == -1) { Py_DECREF(encoded); return 0; } #endif if (strlen(data) != (size_t)(size - 1)) { PyObject* InvalidDocument = _error("InvalidDocument"); if (InvalidDocument) { PyErr_SetString(InvalidDocument, "Key names must not contain the NULL byte"); Py_DECREF(InvalidDocument); } Py_DECREF(encoded); return 0; } #if PY_MAJOR_VERSION < 3 } else if (PyString_Check(key)) { result_t status; encoded = key; Py_INCREF(encoded); if (!(data = PyString_AS_STRING(encoded))) { Py_DECREF(encoded); return 0; } if ((size = _downcast_and_check(PyString_GET_SIZE(encoded), 1)) == -1) { Py_DECREF(encoded); return 0; } status = check_string((const unsigned char*)data, size - 1, 1, 1); if (status == NOT_UTF_8) { PyObject* InvalidStringData = _error("InvalidStringData"); if (InvalidStringData) { PyErr_SetString(InvalidStringData, "strings in documents must be valid UTF-8"); Py_DECREF(InvalidStringData); } Py_DECREF(encoded); return 0; } else if (status == HAS_NULL) { PyObject* InvalidDocument = _error("InvalidDocument"); if (InvalidDocument) { PyErr_SetString(InvalidDocument, "Key names must not contain the NULL byte"); Py_DECREF(InvalidDocument); } Py_DECREF(encoded); return 0; } #endif } else { PyObject* InvalidDocument = _error("InvalidDocument"); if (InvalidDocument) { PyObject* repr = PyObject_Repr(key); if (repr) { #if PY_MAJOR_VERSION >= 3 PyObject* errmsg = PyUnicode_FromString( "documents must have only string keys, key was "); #else PyObject* errmsg = PyString_FromString( "documents must have only string keys, key was "); #endif if (errmsg) { #if PY_MAJOR_VERSION >= 3 PyObject* error = PyUnicode_Concat(errmsg, repr); if (error) { PyErr_SetObject(InvalidDocument, error); Py_DECREF(error); } Py_DECREF(errmsg); Py_DECREF(repr); #else PyString_ConcatAndDel(&errmsg, repr); if (errmsg) { PyErr_SetObject(InvalidDocument, errmsg); Py_DECREF(errmsg); } #endif } else { Py_DECREF(repr); } } Py_DECREF(InvalidDocument); } return 0; } /* If top_level is True, don't allow writing _id here - it was already written. */ if (!write_pair(self, buffer, data, size - 1, value, check_keys, uuid_subtype, !top_level)) { Py_DECREF(encoded); return 0; } Py_DECREF(encoded); return 1; } /* returns 0 on failure */ int write_dict(PyObject* self, buffer_t buffer, PyObject* dict, unsigned char check_keys, unsigned char uuid_subtype, unsigned char top_level) { PyObject* key; PyObject* iter; char zero = 0; int length; int length_location; if (!PyDict_Check(dict)) { PyObject* repr = PyObject_Repr(dict); if (repr) { #if PY_MAJOR_VERSION >= 3 PyObject* errmsg = PyUnicode_FromString( "encoder expected a mapping type but got: "); if (errmsg) { PyObject* error = PyUnicode_Concat(errmsg, repr); if (error) { PyErr_SetObject(PyExc_TypeError, error); Py_DECREF(error); } Py_DECREF(errmsg); Py_DECREF(repr); } #else PyObject* errmsg = PyString_FromString( "encoder expected a mapping type but got: "); if (errmsg) { PyString_ConcatAndDel(&errmsg, repr); if (errmsg) { PyErr_SetObject(PyExc_TypeError, errmsg); Py_DECREF(errmsg); } } #endif else { Py_DECREF(repr); } } else { PyErr_SetString(PyExc_TypeError, "encoder expected a mapping type"); } return 0; } length_location = buffer_save_space(buffer, 4); if (length_location == -1) { PyErr_NoMemory(); return 0; } /* Write _id first if this is a top level doc. */ if (top_level) { PyObject* _id = PyDict_GetItemString(dict, "_id"); if (_id) { if (!write_pair(self, buffer, "_id", 3, _id, check_keys, uuid_subtype, 1)) { return 0; } } } iter = PyObject_GetIter(dict); if (iter == NULL) { return 0; } while ((key = PyIter_Next(iter)) != NULL) { PyObject* value = PyDict_GetItem(dict, key); if (!value) { PyErr_SetObject(PyExc_KeyError, key); Py_DECREF(key); Py_DECREF(iter); return 0; } if (!decode_and_write_pair(self, buffer, key, value, check_keys, uuid_subtype, top_level)) { Py_DECREF(key); Py_DECREF(iter); return 0; } Py_DECREF(key); } Py_DECREF(iter); /* write null byte and fill in length */ if (!buffer_write_bytes(buffer, &zero, 1)) { return 0; } length = buffer_get_position(buffer) - length_location; memcpy(buffer_get_buffer(buffer) + length_location, &length, 4); return 1; } static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) { PyObject* dict; PyObject* result; unsigned char check_keys; unsigned char uuid_subtype; unsigned char top_level = 1; buffer_t buffer; if (!PyArg_ParseTuple(args, "Obb|b", &dict, &check_keys, &uuid_subtype, &top_level)) { return NULL; } buffer = buffer_new(); if (!buffer) { PyErr_NoMemory(); return NULL; } if (!write_dict(self, buffer, dict, check_keys, uuid_subtype, top_level)) { buffer_free(buffer); return NULL; } /* objectify buffer */ #if PY_MAJOR_VERSION >= 3 result = Py_BuildValue("y#", buffer_get_buffer(buffer), buffer_get_position(buffer)); #else result = Py_BuildValue("s#", buffer_get_buffer(buffer), buffer_get_position(buffer)); #endif buffer_free(buffer); return result; } static PyObject* get_value(PyObject* self, const char* buffer, unsigned* position, unsigned char type, unsigned max, PyObject* as_class, unsigned char tz_aware, unsigned char uuid_subtype, unsigned char compile_re) { struct module_state *state = GETSTATE(self); PyObject* value = NULL; switch (type) { case 1: { double d; if (max < 8) { goto invalid; } memcpy(&d, buffer + *position, 8); value = PyFloat_FromDouble(d); *position += 8; break; } case 2: case 14: { unsigned value_length; if (max < 4) { goto invalid; } memcpy(&value_length, buffer + *position, 4); /* Encoded string length + string */ if (!value_length || max < value_length || max < 4 + value_length) { goto invalid; } *position += 4; /* Strings must end in \0 */ if (buffer[*position + value_length - 1]) { goto invalid; } value = PyUnicode_DecodeUTF8(buffer + *position, value_length - 1, "strict"); if (!value) { goto invalid; } *position += value_length; break; } case 3: { PyObject* collection; unsigned size; if (max < 4) { goto invalid; } memcpy(&size, buffer + *position, 4); if (size < BSON_MIN_SIZE || max < size) { goto invalid; } /* Check for bad eoo */ if (buffer[*position + size - 1]) { goto invalid; } value = elements_to_dict(self, buffer + *position + 4, size - 5, as_class, tz_aware, uuid_subtype, compile_re); if (!value) { goto invalid; } /* Decoding for DBRefs */ collection = PyDict_GetItemString(value, "$ref"); if (collection) { /* DBRef */ PyObject* dbref = NULL; PyObject* dbref_type; PyObject* id; PyObject* database; Py_INCREF(collection); PyDict_DelItemString(value, "$ref"); id = PyDict_GetItemString(value, "$id"); if (id == NULL) { id = Py_None; Py_INCREF(id); } else { Py_INCREF(id); PyDict_DelItemString(value, "$id"); } database = PyDict_GetItemString(value, "$db"); if (database == NULL) { database = Py_None; Py_INCREF(database); } else { Py_INCREF(database); PyDict_DelItemString(value, "$db"); } if ((dbref_type = _get_object(state->DBRef, "bson.dbref", "DBRef"))) { dbref = PyObject_CallFunctionObjArgs(dbref_type, collection, id, database, value, NULL); Py_DECREF(dbref_type); } Py_DECREF(value); value = dbref; Py_DECREF(id); Py_DECREF(collection); Py_DECREF(database); } *position += size; break; } case 4: { unsigned size, end; if (max < 4) { goto invalid; } memcpy(&size, buffer + *position, 4); if (size < BSON_MIN_SIZE || max < size) { goto invalid; } end = *position + size - 1; /* Check for bad eoo */ if (buffer[end]) { goto invalid; } *position += 4; value = PyList_New(0); if (!value) { goto invalid; } while (*position < end) { PyObject* to_append; unsigned char bson_type = (unsigned char)buffer[(*position)++]; size_t key_size = strlen(buffer + *position); if (max < key_size) { Py_DECREF(value); goto invalid; } /* just skip the key, they're in order. */ *position += (unsigned)key_size + 1; if (Py_EnterRecursiveCall(" while decoding a list value")) { Py_DECREF(value); goto invalid; } to_append = get_value(self, buffer, position, bson_type, max - (unsigned)key_size, as_class, tz_aware, uuid_subtype, compile_re); Py_LeaveRecursiveCall(); if (!to_append) { Py_DECREF(value); goto invalid; } PyList_Append(value, to_append); Py_DECREF(to_append); } (*position)++; break; } case 5: { PyObject* data; PyObject* st; PyObject* type_to_create; unsigned length; unsigned char subtype; if (max < 5) { goto invalid; } memcpy(&length, buffer + *position, 4); if (max < length) { goto invalid; } subtype = (unsigned char)buffer[*position + 4]; *position += 5; if (subtype == 2 && length < 4) { goto invalid; } #if PY_MAJOR_VERSION >= 3 /* Python3 special case. Decode BSON binary subtype 0 to bytes. */ if (subtype == 0) { value = PyBytes_FromStringAndSize(buffer + *position, length); *position += length; break; } if (subtype == 2) { data = PyBytes_FromStringAndSize(buffer + *position + 4, length - 4); } else { data = PyBytes_FromStringAndSize(buffer + *position, length); } #else if (subtype == 2) { data = PyString_FromStringAndSize(buffer + *position + 4, length - 4); } else { data = PyString_FromStringAndSize(buffer + *position, length); } #endif if (!data) { goto invalid; } /* Encode as UUID, not Binary */ if ((subtype == 3 || subtype == 4) && state->UUID) { PyObject* kwargs; PyObject* args = PyTuple_New(0); /* UUID should always be 16 bytes */ if (!args || length != 16) { Py_DECREF(data); goto invalid; } kwargs = PyDict_New(); if (!kwargs) { Py_DECREF(data); Py_DECREF(args); goto invalid; } /* * From this point, we hold refs to args, kwargs, and data. * If anything fails, goto uuiderror to clean them up. */ if (uuid_subtype == CSHARP_LEGACY) { /* Legacy C# byte order */ if ((PyDict_SetItemString(kwargs, "bytes_le", data)) == -1) goto uuiderror; } else { if (uuid_subtype == JAVA_LEGACY) { /* Convert from legacy java byte order */ char big_endian[16]; _fix_java(buffer + *position, big_endian); /* Free the previously created PyString object */ Py_DECREF(data); #if PY_MAJOR_VERSION >= 3 data = PyBytes_FromStringAndSize(big_endian, length); #else data = PyString_FromStringAndSize(big_endian, length); #endif if (data == NULL) goto uuiderror; } if ((PyDict_SetItemString(kwargs, "bytes", data)) == -1) goto uuiderror; } if ((type_to_create = _get_object(state->UUID, "uuid", "UUID"))) { value = PyObject_Call(type_to_create, args, kwargs); Py_DECREF(type_to_create); } Py_DECREF(args); Py_DECREF(kwargs); Py_DECREF(data); if (!value) { goto invalid; } *position += length; break; uuiderror: Py_DECREF(args); Py_DECREF(kwargs); Py_XDECREF(data); goto invalid; } #if PY_MAJOR_VERSION >= 3 st = PyLong_FromLong(subtype); #else st = PyInt_FromLong(subtype); #endif if (!st) { Py_DECREF(data); goto invalid; } if ((type_to_create = _get_object(state->Binary, "bson.binary", "Binary"))) { value = PyObject_CallFunctionObjArgs(type_to_create, data, st, NULL); Py_DECREF(type_to_create); } Py_DECREF(st); Py_DECREF(data); if (!value) { goto invalid; } *position += length; break; } case 6: case 10: { value = Py_None; Py_INCREF(value); break; } case 7: { PyObject* objectid_type; if (max < 12) { goto invalid; } if ((objectid_type = _get_object(state->ObjectId, "bson.objectid", "ObjectId"))) { #if PY_MAJOR_VERSION >= 3 value = PyObject_CallFunction(objectid_type, "y#", buffer + *position, 12); #else value = PyObject_CallFunction(objectid_type, "s#", buffer + *position, 12); #endif Py_DECREF(objectid_type); } *position += 12; break; } case 8: { value = buffer[(*position)++] ? Py_True : Py_False; Py_INCREF(value); break; } case 9: { PyObject* utc_type; PyObject* naive; PyObject* replace; PyObject* args; PyObject* kwargs; long long millis; if (max < 8) { goto invalid; } memcpy(&millis, buffer + *position, 8); naive = datetime_from_millis(millis); *position += 8; if (!tz_aware) { /* In the naive case, we're done here. */ value = naive; break; } if (!naive) { goto invalid; } replace = PyObject_GetAttrString(naive, "replace"); Py_DECREF(naive); if (!replace) { goto invalid; } args = PyTuple_New(0); if (!args) { Py_DECREF(replace); goto invalid; } kwargs = PyDict_New(); if (!kwargs) { Py_DECREF(replace); Py_DECREF(args); goto invalid; } utc_type = _get_object(state->UTC, "bson.tz_util", "UTC"); if (!utc_type || PyDict_SetItemString(kwargs, "tzinfo", utc_type) == -1) { Py_DECREF(replace); Py_DECREF(args); Py_DECREF(kwargs); Py_XDECREF(utc_type); goto invalid; } Py_XDECREF(utc_type); value = PyObject_Call(replace, args, kwargs); Py_DECREF(replace); Py_DECREF(args); Py_DECREF(kwargs); break; } case 11: { PyObject* compile_func; PyObject* pattern; int flags; size_t flags_length, i; size_t pattern_length = strlen(buffer + *position); if (pattern_length > BSON_MAX_SIZE || max < pattern_length) { goto invalid; } pattern = PyUnicode_DecodeUTF8(buffer + *position, pattern_length, "strict"); if (!pattern) { goto invalid; } *position += (unsigned)pattern_length + 1; flags_length = strlen(buffer + *position); if (flags_length > BSON_MAX_SIZE || (BSON_MAX_SIZE - pattern_length) < flags_length) { Py_DECREF(pattern); goto invalid; } if (max < pattern_length + flags_length) { Py_DECREF(pattern); goto invalid; } flags = 0; for (i = 0; i < flags_length; i++) { if (buffer[*position + i] == 'i') { flags |= 2; } else if (buffer[*position + i] == 'l') { flags |= 4; } else if (buffer[*position + i] == 'm') { flags |= 8; } else if (buffer[*position + i] == 's') { flags |= 16; } else if (buffer[*position + i] == 'u') { flags |= 32; } else if (buffer[*position + i] == 'x') { flags |= 64; } } *position += (unsigned)flags_length + 1; /* * Use re.compile() if we're configured to compile regular * expressions, else create an instance of our Regex class. */ if (compile_re) { compile_func = _get_object(state->RECompile, "re", "compile"); } else { compile_func = _get_object(state->Regex, "bson.regex", "Regex"); } if (compile_func) { value = PyObject_CallFunction(compile_func, "Oi", pattern, flags); Py_DECREF(compile_func); } Py_DECREF(pattern); break; } case 12: { unsigned coll_length; PyObject* collection; PyObject* id = NULL; PyObject* objectid_type; PyObject* dbref_type; if (max < 4) { goto invalid; } memcpy(&coll_length, buffer + *position, 4); /* Encoded string length + string + 12 byte ObjectId */ if (!coll_length || max < coll_length || max < 4 + coll_length + 12) { goto invalid; } *position += 4; /* Strings must end in \0 */ if (buffer[*position + coll_length - 1]) { goto invalid; } collection = PyUnicode_DecodeUTF8(buffer + *position, coll_length - 1, "strict"); if (!collection) { goto invalid; } *position += coll_length; if ((objectid_type = _get_object(state->ObjectId, "bson.objectid", "ObjectId"))) { #if PY_MAJOR_VERSION >= 3 id = PyObject_CallFunction(objectid_type, "y#", buffer + *position, 12); #else id = PyObject_CallFunction(objectid_type, "s#", buffer + *position, 12); #endif Py_DECREF(objectid_type); } if (!id) { Py_DECREF(collection); goto invalid; } *position += 12; if ((dbref_type = _get_object(state->DBRef, "bson.dbref", "DBRef"))) { value = PyObject_CallFunctionObjArgs(dbref_type, collection, id, NULL); Py_DECREF(dbref_type); } Py_DECREF(collection); Py_DECREF(id); break; } case 13: { PyObject* code; PyObject* code_type; unsigned value_length; if (max < 4) { goto invalid; } memcpy(&value_length, buffer + *position, 4); /* Encoded string length + string */ if (!value_length || max < value_length || max < 4 + value_length) { goto invalid; } *position += 4; /* Strings must end in \0 */ if (buffer[*position + value_length - 1]) { goto invalid; } code = PyUnicode_DecodeUTF8(buffer + *position, value_length - 1, "strict"); if (!code) { goto invalid; } *position += value_length; if ((code_type = _get_object(state->Code, "bson.code", "Code"))) { value = PyObject_CallFunctionObjArgs(code_type, code, NULL, NULL); Py_DECREF(code_type); } Py_DECREF(code); break; } case 15: { unsigned c_w_s_size; unsigned code_size; unsigned scope_size; PyObject* code; PyObject* scope; PyObject* code_type; if (max < 8) { goto invalid; } memcpy(&c_w_s_size, buffer + *position, 4); *position += 4; if (max < c_w_s_size) { goto invalid; } memcpy(&code_size, buffer + *position, 4); /* code_w_scope length + code length + code + scope length */ if (!code_size || max < code_size || max < 4 + 4 + code_size + 4) { goto invalid; } *position += 4; /* Strings must end in \0 */ if (buffer[*position + code_size - 1]) { goto invalid; } code = PyUnicode_DecodeUTF8(buffer + *position, code_size - 1, "strict"); if (!code) { goto invalid; } *position += code_size; memcpy(&scope_size, buffer + *position, 4); if (scope_size < BSON_MIN_SIZE) { Py_DECREF(code); goto invalid; } /* code length + code + scope length + scope */ if ((4 + code_size + 4 + scope_size) != c_w_s_size) { Py_DECREF(code); goto invalid; } /* Check for bad eoo */ if (buffer[*position + scope_size - 1]) { goto invalid; } scope = elements_to_dict(self, buffer + *position + 4, scope_size - 5, (PyObject*)&PyDict_Type, tz_aware, uuid_subtype, compile_re); if (!scope) { Py_DECREF(code); goto invalid; } *position += scope_size; if ((code_type = _get_object(state->Code, "bson.code", "Code"))) { value = PyObject_CallFunctionObjArgs(code_type, code, scope, NULL); Py_DECREF(code_type); } Py_DECREF(code); Py_DECREF(scope); break; } case 16: { int i; if (max < 4) { goto invalid; } memcpy(&i, buffer + *position, 4); #if PY_MAJOR_VERSION >= 3 value = PyLong_FromLong(i); #else value = PyInt_FromLong(i); #endif if (!value) { goto invalid; } *position += 4; break; } case 17: { unsigned int time, inc; PyObject* timestamp_type; if (max < 8) { goto invalid; } memcpy(&inc, buffer + *position, 4); memcpy(&time, buffer + *position + 4, 4); if ((timestamp_type = _get_object(state->Timestamp, "bson.timestamp", "Timestamp"))) { value = PyObject_CallFunction(timestamp_type, "II", time, inc); Py_DECREF(timestamp_type); } *position += 8; break; } case 18: { long long ll; if (max < 8) { goto invalid; } memcpy(&ll, buffer + *position, 8); value = PyLong_FromLongLong(ll); if (!value) { goto invalid; } *position += 8; break; } case 255: { PyObject* minkey_type = _get_object(state->MinKey, "bson.min_key", "MinKey"); if (!minkey_type) goto invalid; value = PyObject_CallFunctionObjArgs(minkey_type, NULL); Py_DECREF(minkey_type); break; } case 127: { PyObject* maxkey_type = _get_object(state->MaxKey, "bson.max_key", "MaxKey"); if (!maxkey_type) goto invalid; value = PyObject_CallFunctionObjArgs(maxkey_type, NULL); Py_DECREF(maxkey_type); break; } default: { PyObject* InvalidDocument = _error("InvalidDocument"); if (InvalidDocument) { PyErr_SetString(InvalidDocument, "no c decoder for this type yet"); Py_DECREF(InvalidDocument); } goto invalid; } } if (value) { return value; } invalid: /* * Wrap any non-InvalidBSON errors in InvalidBSON. */ if (PyErr_Occurred()) { PyObject *etype, *evalue, *etrace; PyObject *InvalidBSON; /* * Calling _error clears the error state, so fetch it first. */ PyErr_Fetch(&etype, &evalue, &etrace); InvalidBSON = _error("InvalidBSON"); if (InvalidBSON) { if (!PyErr_GivenExceptionMatches(etype, InvalidBSON)) { /* * Raise InvalidBSON(str(e)). */ Py_DECREF(etype); etype = InvalidBSON; if (evalue) { PyObject *msg = PyObject_Str(evalue); Py_DECREF(evalue); evalue = msg; } PyErr_NormalizeException(&etype, &evalue, &etrace); } else { /* * The current exception matches InvalidBSON, so we don't need * this reference after all. */ Py_DECREF(InvalidBSON); } } /* Steals references to args. */ PyErr_Restore(etype, evalue, etrace); } else { PyObject *InvalidBSON = _error("InvalidBSON"); if (InvalidBSON) { PyErr_SetString(InvalidBSON, "invalid length or type code"); Py_DECREF(InvalidBSON); } } return NULL; } static PyObject* _elements_to_dict(PyObject* self, const char* string, unsigned max, PyObject* as_class, unsigned char tz_aware, unsigned char uuid_subtype, unsigned char compile_re) { unsigned position = 0; PyObject* dict = PyObject_CallObject(as_class, NULL); if (!dict) { return NULL; } while (position < max) { PyObject* name; PyObject* value; unsigned char type = (unsigned char)string[position++]; size_t name_length = strlen(string + position); if (name_length > BSON_MAX_SIZE || position + name_length >= max) { PyObject* InvalidBSON = _error("InvalidBSON"); if (InvalidBSON) { PyErr_SetNone(InvalidBSON); Py_DECREF(InvalidBSON); } Py_DECREF(dict); return NULL; } name = PyUnicode_DecodeUTF8(string + position, name_length, "strict"); if (!name) { Py_DECREF(dict); return NULL; } position += (unsigned)name_length + 1; value = get_value(self, string, &position, type, max - position, as_class, tz_aware, uuid_subtype, compile_re); if (!value) { Py_DECREF(name); Py_DECREF(dict); return NULL; } PyObject_SetItem(dict, name, value); Py_DECREF(name); Py_DECREF(value); } return dict; } static PyObject* elements_to_dict(PyObject* self, const char* string, unsigned max, PyObject* as_class, unsigned char tz_aware, unsigned char uuid_subtype, unsigned char compile_re) { PyObject* result; if (Py_EnterRecursiveCall(" while decoding a BSON document")) return NULL; result = _elements_to_dict(self, string, max, as_class, tz_aware, uuid_subtype, compile_re); Py_LeaveRecursiveCall(); return result; } static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) { int size; Py_ssize_t total_size; const char* string; PyObject* bson; PyObject* as_class; unsigned char tz_aware; unsigned char uuid_subtype; unsigned char compile_re; PyObject* dict; PyObject* remainder; PyObject* result; if (!PyArg_ParseTuple( args, "OObbb", &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) { return NULL; } #if PY_MAJOR_VERSION >= 3 if (!PyBytes_Check(bson)) { PyErr_SetString(PyExc_TypeError, "argument to _bson_to_dict must be a bytes object"); #else if (!PyString_Check(bson)) { PyErr_SetString(PyExc_TypeError, "argument to _bson_to_dict must be a string"); #endif return NULL; } #if PY_MAJOR_VERSION >= 3 total_size = PyBytes_Size(bson); #else total_size = PyString_Size(bson); #endif if (total_size < BSON_MIN_SIZE) { PyObject* InvalidBSON = _error("InvalidBSON"); if (InvalidBSON) { PyErr_SetString(InvalidBSON, "not enough data for a BSON document"); Py_DECREF(InvalidBSON); } return NULL; } #if PY_MAJOR_VERSION >= 3 string = PyBytes_AsString(bson); #else string = PyString_AsString(bson); #endif if (!string) { return NULL; } memcpy(&size, string, 4); if (size < BSON_MIN_SIZE) { PyObject* InvalidBSON = _error("InvalidBSON"); if (InvalidBSON) { PyErr_SetString(InvalidBSON, "invalid message size"); Py_DECREF(InvalidBSON); } return NULL; } if (total_size < size || total_size > BSON_MAX_SIZE) { PyObject* InvalidBSON = _error("InvalidBSON"); if (InvalidBSON) { PyErr_SetString(InvalidBSON, "objsize too large"); Py_DECREF(InvalidBSON); } return NULL; } if (size != total_size || string[size - 1]) { PyObject* InvalidBSON = _error("InvalidBSON"); if (InvalidBSON) { PyErr_SetString(InvalidBSON, "bad eoo"); Py_DECREF(InvalidBSON); } return NULL; } dict = elements_to_dict(self, string + 4, (unsigned)size - 5, as_class, tz_aware, uuid_subtype, compile_re); if (!dict) { return NULL; } #if PY_MAJOR_VERSION >= 3 remainder = PyBytes_FromStringAndSize(string + size, total_size - size); #else remainder = PyString_FromStringAndSize(string + size, total_size - size); #endif if (!remainder) { Py_DECREF(dict); return NULL; } result = Py_BuildValue("OO", dict, remainder); Py_DECREF(dict); Py_DECREF(remainder); return result; } static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) { int size; Py_ssize_t total_size; const char* string; PyObject* bson; PyObject* dict; PyObject* result; PyObject* as_class = (PyObject*)&PyDict_Type; unsigned char tz_aware = 1; unsigned char uuid_subtype = 3; unsigned char compile_re = 1; if (!PyArg_ParseTuple( args, "O|Obbb", &bson, &as_class, &tz_aware, &uuid_subtype, &compile_re)) { return NULL; } #if PY_MAJOR_VERSION >= 3 if (!PyBytes_Check(bson)) { PyErr_SetString(PyExc_TypeError, "argument to decode_all must be a bytes object"); #else if (!PyString_Check(bson)) { PyErr_SetString(PyExc_TypeError, "argument to decode_all must be a string"); #endif return NULL; } #if PY_MAJOR_VERSION >= 3 total_size = PyBytes_Size(bson); string = PyBytes_AsString(bson); #else total_size = PyString_Size(bson); string = PyString_AsString(bson); #endif if (!string) { return NULL; } if (!(result = PyList_New(0))) return NULL; while (total_size > 0) { if (total_size < BSON_MIN_SIZE) { PyObject* InvalidBSON = _error("InvalidBSON"); if (InvalidBSON) { PyErr_SetString(InvalidBSON, "not enough data for a BSON document"); Py_DECREF(InvalidBSON); } Py_DECREF(result); return NULL; } memcpy(&size, string, 4); if (size < BSON_MIN_SIZE) { PyObject* InvalidBSON = _error("InvalidBSON"); if (InvalidBSON) { PyErr_SetString(InvalidBSON, "invalid message size"); Py_DECREF(InvalidBSON); } Py_DECREF(result); return NULL; } if (total_size < size) { PyObject* InvalidBSON = _error("InvalidBSON"); if (InvalidBSON) { PyErr_SetString(InvalidBSON, "objsize too large"); Py_DECREF(InvalidBSON); } Py_DECREF(result); return NULL; } if (string[size - 1]) { PyObject* InvalidBSON = _error("InvalidBSON"); if (InvalidBSON) { PyErr_SetString(InvalidBSON, "bad eoo"); Py_DECREF(InvalidBSON); } Py_DECREF(result); return NULL; } dict = elements_to_dict(self, string + 4, (unsigned)size - 5, as_class, tz_aware, uuid_subtype, compile_re); if (!dict) { Py_DECREF(result); return NULL; } PyList_Append(result, dict); Py_DECREF(dict); string += size; total_size -= size; } return result; } static PyMethodDef _CBSONMethods[] = { {"_dict_to_bson", _cbson_dict_to_bson, METH_VARARGS, "convert a dictionary to a string containing its BSON representation."}, {"_bson_to_dict", _cbson_bson_to_dict, METH_VARARGS, "convert a BSON string to a SON object."}, {"decode_all", _cbson_decode_all, METH_VARARGS, "convert binary data to a sequence of documents."}, {NULL, NULL, 0, NULL} }; #if PY_MAJOR_VERSION >= 3 #define INITERROR return NULL static int _cbson_traverse(PyObject *m, visitproc visit, void *arg) { Py_VISIT(GETSTATE(m)->Binary); Py_VISIT(GETSTATE(m)->Code); Py_VISIT(GETSTATE(m)->ObjectId); Py_VISIT(GETSTATE(m)->DBRef); Py_VISIT(GETSTATE(m)->RECompile); Py_VISIT(GETSTATE(m)->Regex); Py_VISIT(GETSTATE(m)->UUID); Py_VISIT(GETSTATE(m)->Timestamp); Py_VISIT(GETSTATE(m)->MinKey); Py_VISIT(GETSTATE(m)->MaxKey); Py_VISIT(GETSTATE(m)->UTC); Py_VISIT(GETSTATE(m)->REType); return 0; } static int _cbson_clear(PyObject *m) { Py_CLEAR(GETSTATE(m)->Binary); Py_CLEAR(GETSTATE(m)->Code); Py_CLEAR(GETSTATE(m)->ObjectId); Py_CLEAR(GETSTATE(m)->DBRef); Py_CLEAR(GETSTATE(m)->RECompile); Py_CLEAR(GETSTATE(m)->Regex); Py_CLEAR(GETSTATE(m)->UUID); Py_CLEAR(GETSTATE(m)->Timestamp); Py_CLEAR(GETSTATE(m)->MinKey); Py_CLEAR(GETSTATE(m)->MaxKey); Py_CLEAR(GETSTATE(m)->UTC); Py_CLEAR(GETSTATE(m)->REType); return 0; } static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_cbson", NULL, sizeof(struct module_state), _CBSONMethods, NULL, _cbson_traverse, _cbson_clear, NULL }; PyMODINIT_FUNC PyInit__cbson(void) #else #define INITERROR return PyMODINIT_FUNC init_cbson(void) #endif { PyObject *m; PyObject *c_api_object; static void *_cbson_API[_cbson_API_POINTER_COUNT]; PyDateTime_IMPORT; if (PyDateTimeAPI == NULL) { INITERROR; } /* Export C API */ _cbson_API[_cbson_buffer_write_bytes_INDEX] = (void *) buffer_write_bytes; _cbson_API[_cbson_write_dict_INDEX] = (void *) write_dict; _cbson_API[_cbson_write_pair_INDEX] = (void *) write_pair; _cbson_API[_cbson_decode_and_write_pair_INDEX] = (void *) decode_and_write_pair; #if PY_VERSION_HEX >= 0x03010000 /* PyCapsule is new in python 3.1 */ c_api_object = PyCapsule_New((void *) _cbson_API, "_cbson._C_API", NULL); #else c_api_object = PyCObject_FromVoidPtr((void *) _cbson_API, NULL); #endif if (c_api_object == NULL) INITERROR; #if PY_MAJOR_VERSION >= 3 m = PyModule_Create(&moduledef); #else m = Py_InitModule("_cbson", _CBSONMethods); #endif if (m == NULL) { Py_DECREF(c_api_object); INITERROR; } /* Import several python objects */ if (_load_python_objects(m)) { Py_DECREF(c_api_object); #if PY_MAJOR_VERSION >= 3 Py_DECREF(m); #endif INITERROR; } if (PyModule_AddObject(m, "_C_API", c_api_object) < 0) { Py_DECREF(c_api_object); #if PY_MAJOR_VERSION >= 3 Py_DECREF(m); #endif INITERROR; } #if PY_MAJOR_VERSION >= 3 return m; #endif }