xtquant.xtbson.bson37

BSON (Binary JSON) encoding and decoding.

The mapping from Python types to BSON types is as follows:

======================================= ============= =================== Python Type BSON Type Supported Direction ======================================= ============= =================== None null both bool boolean both int 1 int32 / int64 py -> bson bson.int64.Int64 int64 both float number (real) both str string both list array both dict / SON object both datetime.datetime 2 3 date both bson.regex.Regex regex both compiled re 4 regex py -> bson bson.binary.Binary binary both bson.objectid.ObjectId oid both bson.dbref.DBRef dbref both None undefined bson -> py bson.code.Code code both str symbol bson -> py bytes 5 binary both ======================================= ============= ===================


  1. A Python int will be saved as a BSON int32 or BSON int64 depending on its size. A BSON int32 will always decode to a Python int. A BSON int64 will always decode to a ~bson.int64.Int64

  2. datetime.datetime instances will be rounded to the nearest millisecond when saved 

  3. all datetime.datetime instances are treated as naive. clients should always use UTC. 

  4. ~bson.regex.Regex instances and regular expression objects from re.compile() are both saved as BSON regular expressions. BSON regular expressions are decoded as ~bson.regex.Regex instances. 

  5. The bytes type is encoded as BSON binary with subtype 0. It will be decoded back to bytes. 

   1# Copyright 2009-present MongoDB, Inc.
   2#
   3# Licensed under the Apache License, Version 2.0 (the "License");
   4# you may not use this file except in compliance with the License.
   5# You may obtain a copy of the License at
   6#
   7# http://www.apache.org/licenses/LICENSE-2.0
   8#
   9# Unless required by applicable law or agreed to in writing, software
  10# distributed under the License is distributed on an "AS IS" BASIS,
  11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12# See the License for the specific language governing permissions and
  13# limitations under the License.
  14
  15"""BSON (Binary JSON) encoding and decoding.
  16
  17The mapping from Python types to BSON types is as follows:
  18
  19=======================================  =============  ===================
  20Python Type                              BSON Type      Supported Direction
  21=======================================  =============  ===================
  22None                                     null           both
  23bool                                     boolean        both
  24int [#int]_                              int32 / int64  py -> bson
  25`bson.int64.Int64`                       int64          both
  26float                                    number (real)  both
  27str                                      string         both
  28list                                     array          both
  29dict / `SON`                             object         both
  30datetime.datetime [#dt]_ [#dt2]_         date           both
  31`bson.regex.Regex`                       regex          both
  32compiled re [#re]_                       regex          py -> bson
  33`bson.binary.Binary`                     binary         both
  34`bson.objectid.ObjectId`                 oid            both
  35`bson.dbref.DBRef`                       dbref          both
  36None                                     undefined      bson -> py
  37`bson.code.Code`                         code           both
  38str                                      symbol         bson -> py
  39bytes [#bytes]_                          binary         both
  40=======================================  =============  ===================
  41
  42.. [#int] A Python int will be saved as a BSON int32 or BSON int64 depending
  43   on its size. A BSON int32 will always decode to a Python int. A BSON
  44   int64 will always decode to a :class:`~bson.int64.Int64`.
  45.. [#dt] datetime.datetime instances will be rounded to the nearest
  46   millisecond when saved
  47.. [#dt2] all datetime.datetime instances are treated as *naive*. clients
  48   should always use UTC.
  49.. [#re] :class:`~bson.regex.Regex` instances and regular expression
  50   objects from ``re.compile()`` are both saved as BSON regular expressions.
  51   BSON regular expressions are decoded as :class:`~bson.regex.Regex`
  52   instances.
  53.. [#bytes] The bytes type is encoded as BSON binary with
  54   subtype 0. It will be decoded back to bytes.
  55"""
  56
  57import datetime
  58import itertools
  59import os
  60import re
  61import struct
  62import sys
  63import uuid
  64from codecs import utf_8_decode as _utf_8_decode
  65from codecs import utf_8_encode as _utf_8_encode
  66from collections import abc as _abc
  67from typing import (
  68    IO,
  69    TYPE_CHECKING,
  70    Any,
  71    BinaryIO,
  72    Callable,
  73    Dict,
  74    Generator,
  75    Iterator,
  76    List,
  77    Mapping,
  78    MutableMapping,
  79    NoReturn,
  80    Optional,
  81    Sequence,
  82    Tuple,
  83    Type,
  84    TypeVar,
  85    Union,
  86    cast,
  87)
  88
  89from .binary import (
  90    ALL_UUID_SUBTYPES,
  91    CSHARP_LEGACY,
  92    JAVA_LEGACY,
  93    OLD_UUID_SUBTYPE,
  94    STANDARD,
  95    UUID_SUBTYPE,
  96    Binary,
  97    UuidRepresentation,
  98)
  99from .code import Code
 100from .codec_options import (
 101    DEFAULT_CODEC_OPTIONS,
 102    CodecOptions,
 103    DatetimeConversion,
 104    _DocumentType,
 105    _raw_document_class,
 106)
 107from .datetime_ms import (
 108    EPOCH_AWARE,
 109    EPOCH_NAIVE,
 110    DatetimeMS,
 111    _datetime_to_millis,
 112    _millis_to_datetime,
 113    utc,
 114)
 115from .dbref import DBRef
 116from .decimal128 import Decimal128
 117from .errors import InvalidBSON, InvalidDocument, InvalidStringData
 118from .int64 import Int64
 119from .max_key import MaxKey
 120from .min_key import MinKey
 121from .objectid import ObjectId
 122from .regex import Regex
 123from .son import RE_TYPE, SON
 124from .timestamp import Timestamp
 125
 126# Import some modules for type-checking only.
 127if TYPE_CHECKING:
 128    from array import array
 129    from mmap import mmap
 130
 131try:
 132    from . import _cbson  # type: ignore[attr-defined]
 133
 134    _USE_C = True
 135except ImportError:
 136    _USE_C = False
 137
 138__all__ = [
 139    "ALL_UUID_SUBTYPES",
 140    "CSHARP_LEGACY",
 141    "JAVA_LEGACY",
 142    "OLD_UUID_SUBTYPE",
 143    "STANDARD",
 144    "UUID_SUBTYPE",
 145    "Binary",
 146    "UuidRepresentation",
 147    "Code",
 148    "DEFAULT_CODEC_OPTIONS",
 149    "CodecOptions",
 150    "DBRef",
 151    "Decimal128",
 152    "InvalidBSON",
 153    "InvalidDocument",
 154    "InvalidStringData",
 155    "Int64",
 156    "MaxKey",
 157    "MinKey",
 158    "ObjectId",
 159    "Regex",
 160    "RE_TYPE",
 161    "SON",
 162    "Timestamp",
 163    "utc",
 164    "EPOCH_AWARE",
 165    "EPOCH_NAIVE",
 166    "BSONNUM",
 167    "BSONSTR",
 168    "BSONOBJ",
 169    "BSONARR",
 170    "BSONBIN",
 171    "BSONUND",
 172    "BSONOID",
 173    "BSONBOO",
 174    "BSONDAT",
 175    "BSONNUL",
 176    "BSONRGX",
 177    "BSONREF",
 178    "BSONCOD",
 179    "BSONSYM",
 180    "BSONCWS",
 181    "BSONINT",
 182    "BSONTIM",
 183    "BSONLON",
 184    "BSONDEC",
 185    "BSONMIN",
 186    "BSONMAX",
 187    "get_data_and_view",
 188    "gen_list_name",
 189    "encode",
 190    "decode",
 191    "decode_all",
 192    "decode_iter",
 193    "decode_file_iter",
 194    "is_valid",
 195    "BSON",
 196    "has_c",
 197    "DatetimeConversion",
 198    "DatetimeMS",
 199]
 200
 201BSONNUM = b"\x01"  # Floating point
 202BSONSTR = b"\x02"  # UTF-8 string
 203BSONOBJ = b"\x03"  # Embedded document
 204BSONARR = b"\x04"  # Array
 205BSONBIN = b"\x05"  # Binary
 206BSONUND = b"\x06"  # Undefined
 207BSONOID = b"\x07"  # ObjectId
 208BSONBOO = b"\x08"  # Boolean
 209BSONDAT = b"\x09"  # UTC Datetime
 210BSONNUL = b"\x0A"  # Null
 211BSONRGX = b"\x0B"  # Regex
 212BSONREF = b"\x0C"  # DBRef
 213BSONCOD = b"\x0D"  # Javascript code
 214BSONSYM = b"\x0E"  # Symbol
 215BSONCWS = b"\x0F"  # Javascript code with scope
 216BSONINT = b"\x10"  # 32bit int
 217BSONTIM = b"\x11"  # Timestamp
 218BSONLON = b"\x12"  # 64bit int
 219BSONDEC = b"\x13"  # Decimal128
 220BSONMIN = b"\xFF"  # Min key
 221BSONMAX = b"\x7F"  # Max key
 222
 223
 224_UNPACK_FLOAT_FROM = struct.Struct("<d").unpack_from
 225_UNPACK_INT = struct.Struct("<i").unpack
 226_UNPACK_INT_FROM = struct.Struct("<i").unpack_from
 227_UNPACK_LENGTH_SUBTYPE_FROM = struct.Struct("<iB").unpack_from
 228_UNPACK_LONG_FROM = struct.Struct("<q").unpack_from
 229_UNPACK_TIMESTAMP_FROM = struct.Struct("<II").unpack_from
 230
 231
 232def get_data_and_view(data: Any) -> Tuple[Any, memoryview]:
 233    if isinstance(data, (bytes, bytearray)):
 234        return data, memoryview(data)
 235    view = memoryview(data)
 236    return view.tobytes(), view
 237
 238
 239def _raise_unknown_type(element_type: int, element_name: str) -> NoReturn:
 240    """Unknown type helper."""
 241    raise InvalidBSON(
 242        "Detected unknown BSON type %r for fieldname '%s'. Are "
 243        "you using the latest driver version?" % (chr(element_type).encode(), element_name)
 244    )
 245
 246
 247def _get_int(
 248    data: Any, view: Any, position: int, dummy0: Any, dummy1: Any, dummy2: Any
 249) -> Tuple[int, int]:
 250    """Decode a BSON int32 to python int."""
 251    return _UNPACK_INT_FROM(data, position)[0], position + 4
 252
 253
 254def _get_c_string(data: Any, view: Any, position: int, opts: CodecOptions) -> Tuple[str, int]:
 255    """Decode a BSON 'C' string to python str."""
 256    end = data.index(b"\x00", position)
 257    return _utf_8_decode(view[position:end], opts.unicode_decode_error_handler, True)[0], end + 1
 258
 259
 260def _get_float(
 261    data: Any, view: Any, position: int, dummy0: Any, dummy1: Any, dummy2: Any
 262) -> Tuple[float, int]:
 263    """Decode a BSON double to python float."""
 264    return _UNPACK_FLOAT_FROM(data, position)[0], position + 8
 265
 266
 267def _get_string(
 268    data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, dummy: Any
 269) -> Tuple[str, int]:
 270    """Decode a BSON string to python str."""
 271    length = _UNPACK_INT_FROM(data, position)[0]
 272    position += 4
 273    if length < 1 or obj_end - position < length:
 274        raise InvalidBSON("invalid string length")
 275    end = position + length - 1
 276    if data[end] != 0:
 277        raise InvalidBSON("invalid end of string")
 278    return _utf_8_decode(view[position:end], opts.unicode_decode_error_handler, True)[0], end + 1
 279
 280
 281def _get_object_size(data: Any, position: int, obj_end: int) -> Tuple[int, int]:
 282    """Validate and return a BSON document's size."""
 283    try:
 284        obj_size = _UNPACK_INT_FROM(data, position)[0]
 285    except struct.error as exc:
 286        raise InvalidBSON(str(exc))
 287    end = position + obj_size - 1
 288    if data[end] != 0:
 289        raise InvalidBSON("bad eoo")
 290    if end >= obj_end:
 291        raise InvalidBSON("invalid object length")
 292    # If this is the top-level document, validate the total size too.
 293    if position == 0 and obj_size != obj_end:
 294        raise InvalidBSON("invalid object length")
 295    return obj_size, end
 296
 297
 298def _get_object(
 299    data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, dummy: Any
 300) -> Tuple[Any, int]:
 301    """Decode a BSON subdocument to opts.document_class or bson.dbref.DBRef."""
 302    obj_size, end = _get_object_size(data, position, obj_end)
 303    if _raw_document_class(opts.document_class):
 304        return (opts.document_class(data[position : end + 1], opts), position + obj_size)
 305
 306    obj = _elements_to_dict(data, view, position + 4, end, opts)
 307
 308    position += obj_size
 309    # If DBRef validation fails, return a normal doc.
 310    if (
 311        isinstance(obj.get("$ref"), str)
 312        and "$id" in obj
 313        and isinstance(obj.get("$db"), (str, type(None)))
 314    ):
 315        return (DBRef(obj.pop("$ref"), obj.pop("$id", None), obj.pop("$db", None), obj), position)
 316    return obj, position
 317
 318
 319def _get_array(
 320    data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, element_name: str
 321) -> Tuple[Any, int]:
 322    """Decode a BSON array to python list."""
 323    size = _UNPACK_INT_FROM(data, position)[0]
 324    end = position + size - 1
 325    if data[end] != 0:
 326        raise InvalidBSON("bad eoo")
 327
 328    position += 4
 329    end -= 1
 330    result: List[Any] = []
 331
 332    # Avoid doing global and attribute lookups in the loop.
 333    append = result.append
 334    index = data.index
 335    getter = _ELEMENT_GETTER
 336    decoder_map = opts.type_registry._decoder_map
 337
 338    while position < end:
 339        element_type = data[position]
 340        # Just skip the keys.
 341        position = index(b"\x00", position) + 1
 342        try:
 343            value, position = getter[element_type](
 344                data, view, position, obj_end, opts, element_name
 345            )
 346        except KeyError:
 347            _raise_unknown_type(element_type, element_name)
 348
 349        if decoder_map:
 350            custom_decoder = decoder_map.get(type(value))
 351            if custom_decoder is not None:
 352                value = custom_decoder(value)
 353
 354        append(value)
 355
 356    if position != end + 1:
 357        raise InvalidBSON("bad array length")
 358    return result, position + 1
 359
 360
 361def _get_binary(
 362    data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, dummy1: Any
 363) -> Tuple[Union[Binary, uuid.UUID], int]:
 364    """Decode a BSON binary to bson.binary.Binary or python UUID."""
 365    length, subtype = _UNPACK_LENGTH_SUBTYPE_FROM(data, position)
 366    position += 5
 367    if subtype == 2:
 368        length2 = _UNPACK_INT_FROM(data, position)[0]
 369        position += 4
 370        if length2 != length - 4:
 371            raise InvalidBSON("invalid binary (st 2) - lengths don't match!")
 372        length = length2
 373    end = position + length
 374    if length < 0 or end > obj_end:
 375        raise InvalidBSON("bad binary object length")
 376
 377    # Convert UUID subtypes to native UUIDs.
 378    if subtype in ALL_UUID_SUBTYPES:
 379        uuid_rep = opts.uuid_representation
 380        binary_value = Binary(data[position:end], subtype)
 381        if (
 382            (uuid_rep == UuidRepresentation.UNSPECIFIED)
 383            or (subtype == UUID_SUBTYPE and uuid_rep != STANDARD)
 384            or (subtype == OLD_UUID_SUBTYPE and uuid_rep == STANDARD)
 385        ):
 386            return binary_value, end
 387        return binary_value.as_uuid(uuid_rep), end
 388
 389    # Decode subtype 0 to 'bytes'.
 390    if subtype == 0:
 391        value = data[position:end]
 392    else:
 393        value = Binary(data[position:end], subtype)
 394
 395    return value, end
 396
 397
 398def _get_oid(
 399    data: Any, view: Any, position: int, dummy0: Any, dummy1: Any, dummy2: Any
 400) -> Tuple[ObjectId, int]:
 401    """Decode a BSON ObjectId to bson.objectid.ObjectId."""
 402    end = position + 12
 403    return ObjectId(data[position:end]), end
 404
 405
 406def _get_boolean(
 407    data: Any, view: Any, position: int, dummy0: Any, dummy1: Any, dummy2: Any
 408) -> Tuple[bool, int]:
 409    """Decode a BSON true/false to python True/False."""
 410    end = position + 1
 411    boolean_byte = data[position:end]
 412    if boolean_byte == b"\x00":
 413        return False, end
 414    elif boolean_byte == b"\x01":
 415        return True, end
 416    raise InvalidBSON("invalid boolean value: %r" % boolean_byte)
 417
 418
 419def _get_date(
 420    data: Any, view: Any, position: int, dummy0: int, opts: CodecOptions, dummy1: Any
 421) -> Tuple[Union[datetime.datetime, DatetimeMS], int]:
 422    """Decode a BSON datetime to python datetime.datetime."""
 423    return _millis_to_datetime(_UNPACK_LONG_FROM(data, position)[0], opts), position + 8
 424
 425
 426def _get_code(
 427    data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, element_name: str
 428) -> Tuple[Code, int]:
 429    """Decode a BSON code to bson.code.Code."""
 430    code, position = _get_string(data, view, position, obj_end, opts, element_name)
 431    return Code(code), position
 432
 433
 434def _get_code_w_scope(
 435    data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, element_name: str
 436) -> Tuple[Code, int]:
 437    """Decode a BSON code_w_scope to bson.code.Code."""
 438    code_end = position + _UNPACK_INT_FROM(data, position)[0]
 439    code, position = _get_string(data, view, position + 4, code_end, opts, element_name)
 440    scope, position = _get_object(data, view, position, code_end, opts, element_name)
 441    if position != code_end:
 442        raise InvalidBSON("scope outside of javascript code boundaries")
 443    return Code(code, scope), position
 444
 445
 446def _get_regex(
 447    data: Any, view: Any, position: int, dummy0: Any, opts: CodecOptions, dummy1: Any
 448) -> Tuple[Regex, int]:
 449    """Decode a BSON regex to bson.regex.Regex or a python pattern object."""
 450    pattern, position = _get_c_string(data, view, position, opts)
 451    bson_flags, position = _get_c_string(data, view, position, opts)
 452    bson_re = Regex(pattern, bson_flags)
 453    return bson_re, position
 454
 455
 456def _get_ref(
 457    data: Any, view: Any, position: int, obj_end: int, opts: CodecOptions, element_name: str
 458) -> Tuple[DBRef, int]:
 459    """Decode (deprecated) BSON DBPointer to bson.dbref.DBRef."""
 460    collection, position = _get_string(data, view, position, obj_end, opts, element_name)
 461    oid, position = _get_oid(data, view, position, obj_end, opts, element_name)
 462    return DBRef(collection, oid), position
 463
 464
 465def _get_timestamp(
 466    data: Any, view: Any, position: int, dummy0: Any, dummy1: Any, dummy2: Any
 467) -> Tuple[Timestamp, int]:
 468    """Decode a BSON timestamp to bson.timestamp.Timestamp."""
 469    inc, timestamp = _UNPACK_TIMESTAMP_FROM(data, position)
 470    return Timestamp(timestamp, inc), position + 8
 471
 472
 473def _get_int64(
 474    data: Any, view: Any, position: int, dummy0: Any, dummy1: Any, dummy2: Any
 475) -> Tuple[Int64, int]:
 476    """Decode a BSON int64 to bson.int64.Int64."""
 477    return Int64(_UNPACK_LONG_FROM(data, position)[0]), position + 8
 478
 479
 480def _get_decimal128(
 481    data: Any, view: Any, position: int, dummy0: Any, dummy1: Any, dummy2: Any
 482) -> Tuple[Decimal128, int]:
 483    """Decode a BSON decimal128 to bson.decimal128.Decimal128."""
 484    end = position + 16
 485    return Decimal128.from_bid(data[position:end]), end
 486
 487
 488# Each decoder function's signature is:
 489#   - data: bytes
 490#   - view: memoryview that references `data`
 491#   - position: int, beginning of object in 'data' to decode
 492#   - obj_end: int, end of object to decode in 'data' if variable-length type
 493#   - opts: a CodecOptions
 494_ELEMENT_GETTER: Dict[int, Callable[..., Tuple[Any, int]]] = {
 495    ord(BSONNUM): _get_float,
 496    ord(BSONSTR): _get_string,
 497    ord(BSONOBJ): _get_object,
 498    ord(BSONARR): _get_array,
 499    ord(BSONBIN): _get_binary,
 500    ord(BSONUND): lambda u, v, w, x, y, z: (None, w),  # Deprecated undefined
 501    ord(BSONOID): _get_oid,
 502    ord(BSONBOO): _get_boolean,
 503    ord(BSONDAT): _get_date,
 504    ord(BSONNUL): lambda u, v, w, x, y, z: (None, w),
 505    ord(BSONRGX): _get_regex,
 506    ord(BSONREF): _get_ref,  # Deprecated DBPointer
 507    ord(BSONCOD): _get_code,
 508    ord(BSONSYM): _get_string,  # Deprecated symbol
 509    ord(BSONCWS): _get_code_w_scope,
 510    ord(BSONINT): _get_int,
 511    ord(BSONTIM): _get_timestamp,
 512    ord(BSONLON): _get_int64,
 513    ord(BSONDEC): _get_decimal128,
 514    ord(BSONMIN): lambda u, v, w, x, y, z: (MinKey(), w),
 515    ord(BSONMAX): lambda u, v, w, x, y, z: (MaxKey(), w),
 516}
 517
 518
 519if _USE_C:
 520
 521    def _element_to_dict(
 522        data: Any,
 523        view: Any,
 524        position: int,
 525        obj_end: int,
 526        opts: CodecOptions,
 527        raw_array: bool = False,
 528    ) -> Any:
 529        return _cbson._element_to_dict(data, position, obj_end, opts, raw_array)
 530
 531else:
 532
 533    def _element_to_dict(
 534        data: Any,
 535        view: Any,
 536        position: int,
 537        obj_end: int,
 538        opts: CodecOptions,
 539        raw_array: bool = False,
 540    ) -> Any:
 541        """Decode a single key, value pair."""
 542        element_type = data[position]
 543        position += 1
 544        element_name, position = _get_c_string(data, view, position, opts)
 545        if raw_array and element_type == ord(BSONARR):
 546            _, end = _get_object_size(data, position, len(data))
 547            return element_name, view[position : end + 1], end + 1
 548        try:
 549            value, position = _ELEMENT_GETTER[element_type](
 550                data, view, position, obj_end, opts, element_name
 551            )
 552        except KeyError:
 553            _raise_unknown_type(element_type, element_name)
 554
 555        if opts.type_registry._decoder_map:
 556            custom_decoder = opts.type_registry._decoder_map.get(type(value))
 557            if custom_decoder is not None:
 558                value = custom_decoder(value)
 559
 560        return element_name, value, position
 561
 562
 563_T = TypeVar("_T", bound=MutableMapping[Any, Any])
 564
 565
 566def _raw_to_dict(
 567    data: Any, position: int, obj_end: int, opts: CodecOptions, result: _T, raw_array: bool = False
 568) -> _T:
 569    data, view = get_data_and_view(data)
 570    return _elements_to_dict(data, view, position, obj_end, opts, result, raw_array=raw_array)
 571
 572
 573def _elements_to_dict(
 574    data: Any,
 575    view: Any,
 576    position: int,
 577    obj_end: int,
 578    opts: CodecOptions,
 579    result: Any = None,
 580    raw_array: bool = False,
 581) -> Any:
 582    """Decode a BSON document into result."""
 583    if result is None:
 584        result = opts.document_class()
 585    end = obj_end - 1
 586    while position < end:
 587        key, value, position = _element_to_dict(
 588            data, view, position, obj_end, opts, raw_array=raw_array
 589        )
 590        result[key] = value
 591    if position != obj_end:
 592        raise InvalidBSON("bad object or element length")
 593    return result
 594
 595
 596def _bson_to_dict(data: Any, opts: CodecOptions) -> Any:
 597    """Decode a BSON string to document_class."""
 598    data, view = get_data_and_view(data)
 599    try:
 600        if _raw_document_class(opts.document_class):
 601            return opts.document_class(data, opts)
 602        _, end = _get_object_size(data, 0, len(data))
 603        return _elements_to_dict(data, view, 4, end, opts)
 604    except InvalidBSON:
 605        raise
 606    except Exception:
 607        # Change exception type to InvalidBSON but preserve traceback.
 608        _, exc_value, exc_tb = sys.exc_info()
 609        raise InvalidBSON(str(exc_value)).with_traceback(exc_tb)
 610
 611
 612if _USE_C:
 613    _bson_to_dict = _cbson._bson_to_dict  # noqa: F811
 614
 615
 616_PACK_FLOAT = struct.Struct("<d").pack
 617_PACK_INT = struct.Struct("<i").pack
 618_PACK_LENGTH_SUBTYPE = struct.Struct("<iB").pack
 619_PACK_LONG = struct.Struct("<q").pack
 620_PACK_TIMESTAMP = struct.Struct("<II").pack
 621_LIST_NAMES = tuple((str(i) + "\x00").encode("utf8") for i in range(1000))
 622
 623
 624def gen_list_name() -> Generator[bytes, None, None]:
 625    """Generate "keys" for encoded lists in the sequence
 626    b"0\x00", b"1\x00", b"2\x00", ...
 627
 628    The first 1000 keys are returned from a pre-built cache. All
 629    subsequent keys are generated on the fly.
 630    """
 631    for name in _LIST_NAMES:
 632        yield name
 633
 634    counter = itertools.count(1000)
 635    while True:
 636        yield (str(next(counter)) + "\x00").encode("utf8")
 637
 638
 639def _make_c_string_check(string: Union[str, bytes]) -> bytes:
 640    """Make a 'C' string, checking for embedded NUL characters."""
 641    if isinstance(string, bytes):
 642        if b"\x00" in string:
 643            raise InvalidDocument("BSON keys / regex patterns must not contain a NUL character")
 644        try:
 645            _utf_8_decode(string, None, True)
 646            return string + b"\x00"
 647        except UnicodeError:
 648            raise InvalidStringData("strings in documents must be valid UTF-8: %r" % string)
 649    else:
 650        if "\x00" in string:
 651            raise InvalidDocument("BSON keys / regex patterns must not contain a NUL character")
 652        return _utf_8_encode(string)[0] + b"\x00"
 653
 654
 655def _make_c_string(string: Union[str, bytes]) -> bytes:
 656    """Make a 'C' string."""
 657    if isinstance(string, bytes):
 658        try:
 659            _utf_8_decode(string, None, True)
 660            return string + b"\x00"
 661        except UnicodeError:
 662            raise InvalidStringData("strings in documents must be valid UTF-8: %r" % string)
 663    else:
 664        return _utf_8_encode(string)[0] + b"\x00"
 665
 666
 667def _make_name(string: str) -> bytes:
 668    """Make a 'C' string suitable for a BSON key."""
 669    # Keys can only be text in python 3.
 670    if "\x00" in string:
 671        raise InvalidDocument("BSON keys / regex patterns must not contain a NUL character")
 672    return _utf_8_encode(string)[0] + b"\x00"
 673
 674
 675def _encode_float(name: bytes, value: float, dummy0: Any, dummy1: Any) -> bytes:
 676    """Encode a float."""
 677    return b"\x01" + name + _PACK_FLOAT(value)
 678
 679
 680def _encode_bytes(name: bytes, value: bytes, dummy0: Any, dummy1: Any) -> bytes:
 681    """Encode a python bytes."""
 682    # Python3 special case. Store 'bytes' as BSON binary subtype 0.
 683    return b"\x05" + name + _PACK_INT(len(value)) + b"\x00" + value
 684
 685
 686def _encode_mapping(name: bytes, value: Any, check_keys: bool, opts: CodecOptions) -> bytes:
 687    """Encode a mapping type."""
 688    if _raw_document_class(value):
 689        return b"\x03" + name + value.raw
 690    data = b"".join([_element_to_bson(key, val, check_keys, opts) for key, val in value.items()])
 691    return b"\x03" + name + _PACK_INT(len(data) + 5) + data + b"\x00"
 692
 693
 694def _encode_dbref(name: bytes, value: DBRef, check_keys: bool, opts: CodecOptions) -> bytes:
 695    """Encode bson.dbref.DBRef."""
 696    buf = bytearray(b"\x03" + name + b"\x00\x00\x00\x00")
 697    begin = len(buf) - 4
 698
 699    buf += _name_value_to_bson(b"$ref\x00", value.collection, check_keys, opts)
 700    buf += _name_value_to_bson(b"$id\x00", value.id, check_keys, opts)
 701    if value.database is not None:
 702        buf += _name_value_to_bson(b"$db\x00", value.database, check_keys, opts)
 703    for key, val in value._DBRef__kwargs.items():
 704        buf += _element_to_bson(key, val, check_keys, opts)
 705
 706    buf += b"\x00"
 707    buf[begin : begin + 4] = _PACK_INT(len(buf) - begin)
 708    return bytes(buf)
 709
 710
 711def _encode_list(name: bytes, value: Sequence[Any], check_keys: bool, opts: CodecOptions) -> bytes:
 712    """Encode a list/tuple."""
 713    lname = gen_list_name()
 714    data = b"".join([_name_value_to_bson(next(lname), item, check_keys, opts) for item in value])
 715    return b"\x04" + name + _PACK_INT(len(data) + 5) + data + b"\x00"
 716
 717
 718def _encode_text(name: bytes, value: str, dummy0: Any, dummy1: Any) -> bytes:
 719    """Encode a python str."""
 720    bvalue = _utf_8_encode(value)[0]
 721    return b"\x02" + name + _PACK_INT(len(bvalue) + 1) + bvalue + b"\x00"
 722
 723
 724def _encode_binary(name: bytes, value: Binary, dummy0: Any, dummy1: Any) -> bytes:
 725    """Encode bson.binary.Binary."""
 726    subtype = value.subtype
 727    if subtype == 2:
 728        value = _PACK_INT(len(value)) + value  # type: ignore
 729    return b"\x05" + name + _PACK_LENGTH_SUBTYPE(len(value), subtype) + value
 730
 731
 732def _encode_uuid(name: bytes, value: uuid.UUID, dummy: Any, opts: CodecOptions) -> bytes:
 733    """Encode uuid.UUID."""
 734    uuid_representation = opts.uuid_representation
 735    binval = Binary.from_uuid(value, uuid_representation=uuid_representation)
 736    return _encode_binary(name, binval, dummy, opts)
 737
 738
 739def _encode_objectid(name: bytes, value: ObjectId, dummy: Any, dummy1: Any) -> bytes:
 740    """Encode bson.objectid.ObjectId."""
 741    return b"\x07" + name + value.binary
 742
 743
 744def _encode_bool(name: bytes, value: bool, dummy0: Any, dummy1: Any) -> bytes:
 745    """Encode a python boolean (True/False)."""
 746    return b"\x08" + name + (value and b"\x01" or b"\x00")
 747
 748
 749def _encode_datetime(name: bytes, value: datetime.datetime, dummy0: Any, dummy1: Any) -> bytes:
 750    """Encode datetime.datetime."""
 751    millis = _datetime_to_millis(value)
 752    return b"\x09" + name + _PACK_LONG(millis)
 753
 754
 755def _encode_datetime_ms(name: bytes, value: DatetimeMS, dummy0: Any, dummy1: Any) -> bytes:
 756    """Encode datetime.datetime."""
 757    millis = int(value)
 758    return b"\x09" + name + _PACK_LONG(millis)
 759
 760
 761def _encode_none(name: bytes, dummy0: Any, dummy1: Any, dummy2: Any) -> bytes:
 762    """Encode python None."""
 763    return b"\x0A" + name
 764
 765
 766def _encode_regex(name: bytes, value: Regex, dummy0: Any, dummy1: Any) -> bytes:
 767    """Encode a python regex or bson.regex.Regex."""
 768    flags = value.flags
 769    # Python 3 common case
 770    if flags == re.UNICODE:
 771        return b"\x0B" + name + _make_c_string_check(value.pattern) + b"u\x00"
 772    elif flags == 0:
 773        return b"\x0B" + name + _make_c_string_check(value.pattern) + b"\x00"
 774    else:
 775        sflags = b""
 776        if flags & re.IGNORECASE:
 777            sflags += b"i"
 778        if flags & re.LOCALE:
 779            sflags += b"l"
 780        if flags & re.MULTILINE:
 781            sflags += b"m"
 782        if flags & re.DOTALL:
 783            sflags += b"s"
 784        if flags & re.UNICODE:
 785            sflags += b"u"
 786        if flags & re.VERBOSE:
 787            sflags += b"x"
 788        sflags += b"\x00"
 789        return b"\x0B" + name + _make_c_string_check(value.pattern) + sflags
 790
 791
 792def _encode_code(name: bytes, value: Code, dummy: Any, opts: CodecOptions) -> bytes:
 793    """Encode bson.code.Code."""
 794    cstring = _make_c_string(value)
 795    cstrlen = len(cstring)
 796    if value.scope is None:
 797        return b"\x0D" + name + _PACK_INT(cstrlen) + cstring
 798    scope = _dict_to_bson(value.scope, False, opts, False)
 799    full_length = _PACK_INT(8 + cstrlen + len(scope))
 800    return b"\x0F" + name + full_length + _PACK_INT(cstrlen) + cstring + scope
 801
 802
 803def _encode_int(name: bytes, value: int, dummy0: Any, dummy1: Any) -> bytes:
 804    """Encode a python int."""
 805    if -2147483648 <= value <= 2147483647:
 806        return b"\x10" + name + _PACK_INT(value)
 807    else:
 808        try:
 809            return b"\x12" + name + _PACK_LONG(value)
 810        except struct.error:
 811            raise OverflowError("BSON can only handle up to 8-byte ints")
 812
 813
 814def _encode_timestamp(name: bytes, value: Any, dummy0: Any, dummy1: Any) -> bytes:
 815    """Encode bson.timestamp.Timestamp."""
 816    return b"\x11" + name + _PACK_TIMESTAMP(value.inc, value.time)
 817
 818
 819def _encode_long(name: bytes, value: Any, dummy0: Any, dummy1: Any) -> bytes:
 820    """Encode a python long (python 2.x)"""
 821    try:
 822        return b"\x12" + name + _PACK_LONG(value)
 823    except struct.error:
 824        raise OverflowError("BSON can only handle up to 8-byte ints")
 825
 826
 827def _encode_decimal128(name: bytes, value: Decimal128, dummy0: Any, dummy1: Any) -> bytes:
 828    """Encode bson.decimal128.Decimal128."""
 829    return b"\x13" + name + value.bid
 830
 831
 832def _encode_minkey(name: bytes, dummy0: Any, dummy1: Any, dummy2: Any) -> bytes:
 833    """Encode bson.min_key.MinKey."""
 834    return b"\xFF" + name
 835
 836
 837def _encode_maxkey(name: bytes, dummy0: Any, dummy1: Any, dummy2: Any) -> bytes:
 838    """Encode bson.max_key.MaxKey."""
 839    return b"\x7F" + name
 840
 841
 842# Each encoder function's signature is:
 843#   - name: utf-8 bytes
 844#   - value: a Python data type, e.g. a Python int for _encode_int
 845#   - check_keys: bool, whether to check for invalid names
 846#   - opts: a CodecOptions
 847_ENCODERS = {
 848    bool: _encode_bool,
 849    bytes: _encode_bytes,
 850    datetime.datetime: _encode_datetime,
 851    DatetimeMS: _encode_datetime_ms,
 852    dict: _encode_mapping,
 853    float: _encode_float,
 854    int: _encode_int,
 855    list: _encode_list,
 856    str: _encode_text,
 857    tuple: _encode_list,
 858    type(None): _encode_none,
 859    uuid.UUID: _encode_uuid,
 860    Binary: _encode_binary,
 861    Int64: _encode_long,
 862    Code: _encode_code,
 863    DBRef: _encode_dbref,
 864    MaxKey: _encode_maxkey,
 865    MinKey: _encode_minkey,
 866    ObjectId: _encode_objectid,
 867    Regex: _encode_regex,
 868    RE_TYPE: _encode_regex,
 869    SON: _encode_mapping,
 870    Timestamp: _encode_timestamp,
 871    Decimal128: _encode_decimal128,
 872    # Special case. This will never be looked up directly.
 873    _abc.Mapping: _encode_mapping,
 874}
 875
 876
 877_MARKERS = {
 878    5: _encode_binary,
 879    7: _encode_objectid,
 880    11: _encode_regex,
 881    13: _encode_code,
 882    17: _encode_timestamp,
 883    18: _encode_long,
 884    100: _encode_dbref,
 885    127: _encode_maxkey,
 886    255: _encode_minkey,
 887}
 888
 889
 890_BUILT_IN_TYPES = tuple(t for t in _ENCODERS)
 891
 892
 893def _name_value_to_bson(
 894    name: bytes,
 895    value: Any,
 896    check_keys: bool,
 897    opts: CodecOptions,
 898    in_custom_call: bool = False,
 899    in_fallback_call: bool = False,
 900) -> bytes:
 901    """Encode a single name, value pair."""
 902    # First see if the type is already cached. KeyError will only ever
 903    # happen once per subtype.
 904    try:
 905        return _ENCODERS[type(value)](name, value, check_keys, opts)  # type: ignore
 906    except KeyError:
 907        pass
 908
 909    # Second, fall back to trying _type_marker. This has to be done
 910    # before the loop below since users could subclass one of our
 911    # custom types that subclasses a python built-in (e.g. Binary)
 912    marker = getattr(value, "_type_marker", None)
 913    if isinstance(marker, int) and marker in _MARKERS:
 914        func = _MARKERS[marker]
 915        # Cache this type for faster subsequent lookup.
 916        _ENCODERS[type(value)] = func
 917        return func(name, value, check_keys, opts)  # type: ignore
 918
 919    # Third, check if a type encoder is registered for this type.
 920    # Note that subtypes of registered custom types are not auto-encoded.
 921    if not in_custom_call and opts.type_registry._encoder_map:
 922        custom_encoder = opts.type_registry._encoder_map.get(type(value))
 923        if custom_encoder is not None:
 924            return _name_value_to_bson(
 925                name, custom_encoder(value), check_keys, opts, in_custom_call=True
 926            )
 927
 928    # Fourth, test each base type. This will only happen once for
 929    # a subtype of a supported base type. Unlike in the C-extensions, this
 930    # is done after trying the custom type encoder because checking for each
 931    # subtype is expensive.
 932    for base in _BUILT_IN_TYPES:
 933        if isinstance(value, base):
 934            func = _ENCODERS[base]
 935            # Cache this type for faster subsequent lookup.
 936            _ENCODERS[type(value)] = func
 937            return func(name, value, check_keys, opts)  # type: ignore
 938
 939    # As a last resort, try using the fallback encoder, if the user has
 940    # provided one.
 941    fallback_encoder = opts.type_registry._fallback_encoder
 942    if not in_fallback_call and fallback_encoder is not None:
 943        return _name_value_to_bson(
 944            name, fallback_encoder(value), check_keys, opts, in_fallback_call=True
 945        )
 946
 947    raise InvalidDocument("cannot encode object: %r, of type: %r" % (value, type(value)))
 948
 949
 950def _element_to_bson(key: Any, value: Any, check_keys: bool, opts: CodecOptions) -> bytes:
 951    """Encode a single key, value pair."""
 952    if not isinstance(key, str):
 953        raise InvalidDocument("documents must have only string keys, key was %r" % (key,))
 954    if check_keys:
 955        if key.startswith("$"):
 956            raise InvalidDocument("key %r must not start with '$'" % (key,))
 957        if "." in key:
 958            raise InvalidDocument("key %r must not contain '.'" % (key,))
 959
 960    name = _make_name(key)
 961    return _name_value_to_bson(name, value, check_keys, opts)
 962
 963
 964def _dict_to_bson(doc: Any, check_keys: bool, opts: CodecOptions, top_level: bool = True) -> bytes:
 965    """Encode a document to BSON."""
 966    if _raw_document_class(doc):
 967        return cast(bytes, doc.raw)
 968    try:
 969        elements = []
 970        if top_level and "_id" in doc:
 971            elements.append(_name_value_to_bson(b"_id\x00", doc["_id"], check_keys, opts))
 972        for key, value in doc.items():
 973            if not top_level or key != "_id":
 974                elements.append(_element_to_bson(key, value, check_keys, opts))
 975    except AttributeError:
 976        raise TypeError("encoder expected a mapping type but got: %r" % (doc,))
 977
 978    encoded = b"".join(elements)
 979    return _PACK_INT(len(encoded) + 5) + encoded + b"\x00"
 980
 981
 982if _USE_C:
 983    _dict_to_bson = _cbson._dict_to_bson  # noqa: F811
 984
 985
 986_CODEC_OPTIONS_TYPE_ERROR = TypeError("codec_options must be an instance of CodecOptions")
 987
 988
 989_DocumentIn = Mapping[str, Any]
 990_ReadableBuffer = Union[bytes, memoryview, "mmap", "array"]
 991
 992
 993def encode(
 994    document: _DocumentIn,
 995    check_keys: bool = False,
 996    codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS,
 997) -> bytes:
 998    """Encode a document to BSON.
 999
1000    A document can be any mapping type (like :class:`dict`).
1001
1002    Raises :class:`TypeError` if `document` is not a mapping type,
1003    or contains keys that are not instances of
1004    :class:`basestring` (:class:`str` in python 3). Raises
1005    :class:`~bson.errors.InvalidDocument` if `document` cannot be
1006    converted to :class:`BSON`.
1007
1008    :Parameters:
1009      - `document`: mapping type representing a document
1010      - `check_keys` (optional): check if keys start with '$' or
1011        contain '.', raising :class:`~bson.errors.InvalidDocument` in
1012        either case
1013      - `codec_options` (optional): An instance of
1014        :class:`~bson.codec_options.CodecOptions`.
1015
1016    .. versionadded:: 3.9
1017    """
1018    if not isinstance(codec_options, CodecOptions):
1019        raise _CODEC_OPTIONS_TYPE_ERROR
1020
1021    return _dict_to_bson(document, check_keys, codec_options)
1022
1023
1024def decode(
1025    data: _ReadableBuffer, codec_options: "Optional[CodecOptions[_DocumentType]]" = None
1026) -> _DocumentType:
1027    """Decode BSON to a document.
1028
1029    By default, returns a BSON document represented as a Python
1030    :class:`dict`. To use a different :class:`MutableMapping` class,
1031    configure a :class:`~bson.codec_options.CodecOptions`::
1032
1033        >>> import collections  # From Python standard library.
1034        >>> import bson
1035        >>> from .codec_options import CodecOptions
1036        >>> data = bson.encode({'a': 1})
1037        >>> decoded_doc = bson.decode(data)
1038        <type 'dict'>
1039        >>> options = CodecOptions(document_class=collections.OrderedDict)
1040        >>> decoded_doc = bson.decode(data, codec_options=options)
1041        >>> type(decoded_doc)
1042        <class 'collections.OrderedDict'>
1043
1044    :Parameters:
1045      - `data`: the BSON to decode. Any bytes-like object that implements
1046        the buffer protocol.
1047      - `codec_options` (optional): An instance of
1048        :class:`~bson.codec_options.CodecOptions`.
1049
1050    .. versionadded:: 3.9
1051    """
1052    opts: CodecOptions = codec_options or DEFAULT_CODEC_OPTIONS
1053    if not isinstance(opts, CodecOptions):
1054        raise _CODEC_OPTIONS_TYPE_ERROR
1055
1056    return _bson_to_dict(data, opts)
1057
1058
1059def _decode_all(data: _ReadableBuffer, opts: "CodecOptions[_DocumentType]") -> List[_DocumentType]:
1060    """Decode a BSON data to multiple documents."""
1061    data, view = get_data_and_view(data)
1062    data_len = len(data)
1063    docs: List[_DocumentType] = []
1064    position = 0
1065    end = data_len - 1
1066    use_raw = _raw_document_class(opts.document_class)
1067    try:
1068        while position < end:
1069            obj_size = _UNPACK_INT_FROM(data, position)[0]
1070            if data_len - position < obj_size:
1071                raise InvalidBSON("invalid object size")
1072            obj_end = position + obj_size - 1
1073            if data[obj_end] != 0:
1074                raise InvalidBSON("bad eoo")
1075            if use_raw:
1076                docs.append(opts.document_class(data[position : obj_end + 1], opts))  # type: ignore
1077            else:
1078                docs.append(_elements_to_dict(data, view, position + 4, obj_end, opts))
1079            position += obj_size
1080        return docs
1081    except InvalidBSON:
1082        raise
1083    except Exception:
1084        # Change exception type to InvalidBSON but preserve traceback.
1085        _, exc_value, exc_tb = sys.exc_info()
1086        raise InvalidBSON(str(exc_value)).with_traceback(exc_tb)
1087
1088
1089if _USE_C:
1090    _decode_all = _cbson._decode_all  # noqa: F811
1091
1092
1093def decode_all(
1094    data: _ReadableBuffer, codec_options: "Optional[CodecOptions[_DocumentType]]" = None
1095) -> List[_DocumentType]:
1096    """Decode BSON data to multiple documents.
1097
1098    `data` must be a bytes-like object implementing the buffer protocol that
1099    provides concatenated, valid, BSON-encoded documents.
1100
1101    :Parameters:
1102      - `data`: BSON data
1103      - `codec_options` (optional): An instance of
1104        :class:`~bson.codec_options.CodecOptions`.
1105
1106    .. versionchanged:: 3.9
1107       Supports bytes-like objects that implement the buffer protocol.
1108
1109    .. versionchanged:: 3.0
1110       Removed `compile_re` option: PyMongo now always represents BSON regular
1111       expressions as :class:`~bson.regex.Regex` objects. Use
1112       :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
1113       BSON regular expression to a Python regular expression object.
1114
1115       Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
1116       `codec_options`.
1117    """
1118    opts = codec_options or DEFAULT_CODEC_OPTIONS
1119    if not isinstance(opts, CodecOptions):
1120        raise _CODEC_OPTIONS_TYPE_ERROR
1121
1122    return _decode_all(data, opts)  # type: ignore[arg-type]
1123
1124
1125def _decode_selective(rawdoc: Any, fields: Any, codec_options: Any) -> Mapping[Any, Any]:
1126    if _raw_document_class(codec_options.document_class):
1127        # If document_class is RawBSONDocument, use vanilla dictionary for
1128        # decoding command response.
1129        doc = {}
1130    else:
1131        # Else, use the specified document_class.
1132        doc = codec_options.document_class()
1133    for key, value in rawdoc.items():
1134        if key in fields:
1135            if fields[key] == 1:
1136                doc[key] = _bson_to_dict(rawdoc.raw, codec_options)[key]
1137            else:
1138                doc[key] = _decode_selective(value, fields[key], codec_options)
1139        else:
1140            doc[key] = value
1141    return doc
1142
1143
1144def _array_of_documents_to_buffer(view: memoryview) -> bytes:
1145    # Extract the raw bytes of each document.
1146    position = 0
1147    _, end = _get_object_size(view, position, len(view))
1148    position += 4
1149    buffers: List[memoryview] = []
1150    append = buffers.append
1151    while position < end - 1:
1152        # Just skip the keys.
1153        while view[position] != 0:
1154            position += 1
1155        position += 1
1156        obj_size, _ = _get_object_size(view, position, end)
1157        append(view[position : position + obj_size])
1158        position += obj_size
1159    if position != end:
1160        raise InvalidBSON("bad object or element length")
1161    return b"".join(buffers)
1162
1163
1164if _USE_C:
1165    _array_of_documents_to_buffer = _cbson._array_of_documents_to_buffer  # noqa: F811
1166
1167
1168def _convert_raw_document_lists_to_streams(document: Any) -> None:
1169    """Convert raw array of documents to a stream of BSON documents."""
1170    cursor = document.get("cursor")
1171    if not cursor:
1172        return
1173    for key in ("firstBatch", "nextBatch"):
1174        batch = cursor.get(key)
1175        if not batch:
1176            continue
1177        data = _array_of_documents_to_buffer(batch)
1178        if data:
1179            cursor[key] = [data]
1180        else:
1181            cursor[key] = []
1182
1183
1184def _decode_all_selective(data: Any, codec_options: CodecOptions, fields: Any) -> List[Any]:
1185    """Decode BSON data to a single document while using user-provided
1186    custom decoding logic.
1187
1188    `data` must be a string representing a valid, BSON-encoded document.
1189
1190    :Parameters:
1191      - `data`: BSON data
1192      - `codec_options`: An instance of
1193        :class:`~bson.codec_options.CodecOptions` with user-specified type
1194        decoders. If no decoders are found, this method is the same as
1195        ``decode_all``.
1196      - `fields`: Map of document namespaces where data that needs
1197        to be custom decoded lives or None. For example, to custom decode a
1198        list of objects in 'field1.subfield1', the specified value should be
1199        ``{'field1': {'subfield1': 1}}``. If ``fields``  is an empty map or
1200        None, this method is the same as ``decode_all``.
1201
1202    :Returns:
1203      - `document_list`: Single-member list containing the decoded document.
1204
1205    .. versionadded:: 3.8
1206    """
1207    if not codec_options.type_registry._decoder_map:
1208        return decode_all(data, codec_options)
1209
1210    if not fields:
1211        return decode_all(data, codec_options.with_options(type_registry=None))
1212
1213    # Decode documents for internal use.
1214    from .raw_bson import RawBSONDocument
1215
1216    internal_codec_options = codec_options.with_options(
1217        document_class=RawBSONDocument, type_registry=None
1218    )
1219    _doc = _bson_to_dict(data, internal_codec_options)
1220    return [
1221        _decode_selective(
1222            _doc,
1223            fields,
1224            codec_options,
1225        )
1226    ]
1227
1228
1229def decode_iter(
1230    data: bytes, codec_options: "Optional[CodecOptions[_DocumentType]]" = None
1231) -> Iterator[_DocumentType]:
1232    """Decode BSON data to multiple documents as a generator.
1233
1234    Works similarly to the decode_all function, but yields one document at a
1235    time.
1236
1237    `data` must be a string of concatenated, valid, BSON-encoded
1238    documents.
1239
1240    :Parameters:
1241      - `data`: BSON data
1242      - `codec_options` (optional): An instance of
1243        :class:`~bson.codec_options.CodecOptions`.
1244
1245    .. versionchanged:: 3.0
1246       Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
1247       `codec_options`.
1248
1249    .. versionadded:: 2.8
1250    """
1251    opts = codec_options or DEFAULT_CODEC_OPTIONS
1252    if not isinstance(opts, CodecOptions):
1253        raise _CODEC_OPTIONS_TYPE_ERROR
1254
1255    position = 0
1256    end = len(data) - 1
1257    while position < end:
1258        obj_size = _UNPACK_INT_FROM(data, position)[0]
1259        elements = data[position : position + obj_size]
1260        position += obj_size
1261
1262        yield _bson_to_dict(elements, opts)
1263
1264
1265def decode_file_iter(
1266    file_obj: Union[BinaryIO, IO], codec_options: "Optional[CodecOptions[_DocumentType]]" = None
1267) -> Iterator[_DocumentType]:
1268    """Decode bson data from a file to multiple documents as a generator.
1269
1270    Works similarly to the decode_all function, but reads from the file object
1271    in chunks and parses bson in chunks, yielding one document at a time.
1272
1273    :Parameters:
1274      - `file_obj`: A file object containing BSON data.
1275      - `codec_options` (optional): An instance of
1276        :class:`~bson.codec_options.CodecOptions`.
1277
1278    .. versionchanged:: 3.0
1279       Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
1280       `codec_options`.
1281
1282    .. versionadded:: 2.8
1283    """
1284    opts = codec_options or DEFAULT_CODEC_OPTIONS
1285    while True:
1286        # Read size of next object.
1287        size_data = file_obj.read(4)
1288        if not size_data:
1289            break  # Finished with file normaly.
1290        elif len(size_data) != 4:
1291            raise InvalidBSON("cut off in middle of objsize")
1292        obj_size = _UNPACK_INT_FROM(size_data, 0)[0] - 4
1293        elements = size_data + file_obj.read(max(0, obj_size))
1294        yield _bson_to_dict(elements, opts)
1295
1296
1297def is_valid(bson: bytes) -> bool:
1298    """Check that the given string represents valid :class:`BSON` data.
1299
1300    Raises :class:`TypeError` if `bson` is not an instance of
1301    :class:`str` (:class:`bytes` in python 3). Returns ``True``
1302    if `bson` is valid :class:`BSON`, ``False`` otherwise.
1303
1304    :Parameters:
1305      - `bson`: the data to be validated
1306    """
1307    if not isinstance(bson, bytes):
1308        raise TypeError("BSON data must be an instance of a subclass of bytes")
1309
1310    try:
1311        _bson_to_dict(bson, DEFAULT_CODEC_OPTIONS)
1312        return True
1313    except Exception:
1314        return False
1315
1316
1317class BSON(bytes):
1318    """BSON (Binary JSON) data.
1319
1320    .. warning:: Using this class to encode and decode BSON adds a performance
1321       cost. For better performance use the module level functions
1322       :func:`encode` and :func:`decode` instead.
1323    """
1324
1325    @classmethod
1326    def encode(
1327        cls: Type["BSON"],
1328        document: _DocumentIn,
1329        check_keys: bool = False,
1330        codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS,
1331    ) -> "BSON":
1332        """Encode a document to a new :class:`BSON` instance.
1333
1334        A document can be any mapping type (like :class:`dict`).
1335
1336        Raises :class:`TypeError` if `document` is not a mapping type,
1337        or contains keys that are not instances of
1338        :class:`basestring` (:class:`str` in python 3). Raises
1339        :class:`~bson.errors.InvalidDocument` if `document` cannot be
1340        converted to :class:`BSON`.
1341
1342        :Parameters:
1343          - `document`: mapping type representing a document
1344          - `check_keys` (optional): check if keys start with '$' or
1345            contain '.', raising :class:`~bson.errors.InvalidDocument` in
1346            either case
1347          - `codec_options` (optional): An instance of
1348            :class:`~bson.codec_options.CodecOptions`.
1349
1350        .. versionchanged:: 3.0
1351           Replaced `uuid_subtype` option with `codec_options`.
1352        """
1353        return cls(encode(document, check_keys, codec_options))
1354
1355    def decode(self, codec_options: "CodecOptions[_DocumentType]" = DEFAULT_CODEC_OPTIONS) -> _DocumentType:  # type: ignore[override,assignment]
1356        """Decode this BSON data.
1357
1358        By default, returns a BSON document represented as a Python
1359        :class:`dict`. To use a different :class:`MutableMapping` class,
1360        configure a :class:`~bson.codec_options.CodecOptions`::
1361
1362            >>> import collections  # From Python standard library.
1363            >>> import bson
1364            >>> from .codec_options import CodecOptions
1365            >>> data = bson.BSON.encode({'a': 1})
1366            >>> decoded_doc = bson.BSON(data).decode()
1367            <type 'dict'>
1368            >>> options = CodecOptions(document_class=collections.OrderedDict)
1369            >>> decoded_doc = bson.BSON(data).decode(codec_options=options)
1370            >>> type(decoded_doc)
1371            <class 'collections.OrderedDict'>
1372
1373        :Parameters:
1374          - `codec_options` (optional): An instance of
1375            :class:`~bson.codec_options.CodecOptions`.
1376
1377        .. versionchanged:: 3.0
1378           Removed `compile_re` option: PyMongo now always represents BSON
1379           regular expressions as :class:`~bson.regex.Regex` objects. Use
1380           :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
1381           BSON regular expression to a Python regular expression object.
1382
1383           Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
1384           `codec_options`.
1385        """
1386        return decode(self, codec_options)
1387
1388
1389def has_c() -> bool:
1390    """Is the C extension installed?"""
1391    return _USE_C
1392
1393
1394def _after_fork():
1395    """Releases the ObjectID lock child."""
1396    if ObjectId._inc_lock.locked():
1397        ObjectId._inc_lock.release()
1398
1399
1400if hasattr(os, "register_at_fork"):
1401    # This will run in the same thread as the fork was called.
1402    # If we fork in a critical region on the same thread, it should break.
1403    # This is fine since we would never call fork directly from a critical region.
1404    os.register_at_fork(after_in_child=_after_fork)
ALL_UUID_SUBTYPES = (3, 4)
CSHARP_LEGACY = 6
JAVA_LEGACY = 5
OLD_UUID_SUBTYPE = 3
STANDARD = 4
UUID_SUBTYPE = 4
class Binary(builtins.bytes):
192class Binary(bytes):
193    """Representation of BSON binary data.
194
195    This is necessary because we want to represent Python strings as
196    the BSON string type. We need to wrap binary data so we can tell
197    the difference between what should be considered binary data and
198    what should be considered a string when we encode to BSON.
199
200    Raises TypeError if `data` is not an instance of :class:`bytes`
201    (:class:`str` in python 2) or `subtype` is not an instance of
202    :class:`int`. Raises ValueError if `subtype` is not in [0, 256).
203
204    .. note::
205      In python 3 instances of Binary with subtype 0 will be decoded
206      directly to :class:`bytes`.
207
208    :Parameters:
209      - `data`: the binary data to represent. Can be any bytes-like type
210        that implements the buffer protocol.
211      - `subtype` (optional): the `binary subtype
212        <https://bsonspec.org/spec.html>`_
213        to use
214
215    .. versionchanged:: 3.9
216      Support any bytes-like type that implements the buffer protocol.
217    """
218
219    _type_marker = 5
220    __subtype: int
221
222    def __new__(
223        cls: Type["Binary"],
224        data: Union[memoryview, bytes, "_mmap", "_array"],
225        subtype: int = BINARY_SUBTYPE,
226    ) -> "Binary":
227        if not isinstance(subtype, int):
228            raise TypeError("subtype must be an instance of int")
229        if subtype >= 256 or subtype < 0:
230            raise ValueError("subtype must be contained in [0, 256)")
231        # Support any type that implements the buffer protocol.
232        self = bytes.__new__(cls, memoryview(data).tobytes())
233        self.__subtype = subtype
234        return self
235
236    @classmethod
237    def from_uuid(
238        cls: Type["Binary"], uuid: UUID, uuid_representation: int = UuidRepresentation.STANDARD
239    ) -> "Binary":
240        """Create a BSON Binary object from a Python UUID.
241
242        Creates a :class:`~bson.binary.Binary` object from a
243        :class:`uuid.UUID` instance. Assumes that the native
244        :class:`uuid.UUID` instance uses the byte-order implied by the
245        provided ``uuid_representation``.
246
247        Raises :exc:`TypeError` if `uuid` is not an instance of
248        :class:`~uuid.UUID`.
249
250        :Parameters:
251          - `uuid`: A :class:`uuid.UUID` instance.
252          - `uuid_representation`: A member of
253            :class:`~bson.binary.UuidRepresentation`. Default:
254            :const:`~bson.binary.UuidRepresentation.STANDARD`.
255            See :ref:`handling-uuid-data-example` for details.
256
257        .. versionadded:: 3.11
258        """
259        if not isinstance(uuid, UUID):
260            raise TypeError("uuid must be an instance of uuid.UUID")
261
262        if uuid_representation not in ALL_UUID_REPRESENTATIONS:
263            raise ValueError(
264                "uuid_representation must be a value from .binary.UuidRepresentation"
265            )
266
267        if uuid_representation == UuidRepresentation.UNSPECIFIED:
268            raise ValueError(
269                "cannot encode native uuid.UUID with "
270                "UuidRepresentation.UNSPECIFIED. UUIDs can be manually "
271                "converted to bson.Binary instances using "
272                "bson.Binary.from_uuid() or a different UuidRepresentation "
273                "can be configured. See the documentation for "
274                "UuidRepresentation for more information."
275            )
276
277        subtype = OLD_UUID_SUBTYPE
278        if uuid_representation == UuidRepresentation.PYTHON_LEGACY:
279            payload = uuid.bytes
280        elif uuid_representation == UuidRepresentation.JAVA_LEGACY:
281            from_uuid = uuid.bytes
282            payload = from_uuid[0:8][::-1] + from_uuid[8:16][::-1]
283        elif uuid_representation == UuidRepresentation.CSHARP_LEGACY:
284            payload = uuid.bytes_le
285        else:
286            # uuid_representation == UuidRepresentation.STANDARD
287            subtype = UUID_SUBTYPE
288            payload = uuid.bytes
289
290        return cls(payload, subtype)
291
292    def as_uuid(self, uuid_representation: int = UuidRepresentation.STANDARD) -> UUID:
293        """Create a Python UUID from this BSON Binary object.
294
295        Decodes this binary object as a native :class:`uuid.UUID` instance
296        with the provided ``uuid_representation``.
297
298        Raises :exc:`ValueError` if this :class:`~bson.binary.Binary` instance
299        does not contain a UUID.
300
301        :Parameters:
302          - `uuid_representation`: A member of
303            :class:`~bson.binary.UuidRepresentation`. Default:
304            :const:`~bson.binary.UuidRepresentation.STANDARD`.
305            See :ref:`handling-uuid-data-example` for details.
306
307        .. versionadded:: 3.11
308        """
309        if self.subtype not in ALL_UUID_SUBTYPES:
310            raise ValueError("cannot decode subtype %s as a uuid" % (self.subtype,))
311
312        if uuid_representation not in ALL_UUID_REPRESENTATIONS:
313            raise ValueError(
314                "uuid_representation must be a value from .binary.UuidRepresentation"
315            )
316
317        if uuid_representation == UuidRepresentation.UNSPECIFIED:
318            raise ValueError("uuid_representation cannot be UNSPECIFIED")
319        elif uuid_representation == UuidRepresentation.PYTHON_LEGACY:
320            if self.subtype == OLD_UUID_SUBTYPE:
321                return UUID(bytes=self)
322        elif uuid_representation == UuidRepresentation.JAVA_LEGACY:
323            if self.subtype == OLD_UUID_SUBTYPE:
324                return UUID(bytes=self[0:8][::-1] + self[8:16][::-1])
325        elif uuid_representation == UuidRepresentation.CSHARP_LEGACY:
326            if self.subtype == OLD_UUID_SUBTYPE:
327                return UUID(bytes_le=self)
328        else:
329            # uuid_representation == UuidRepresentation.STANDARD
330            if self.subtype == UUID_SUBTYPE:
331                return UUID(bytes=self)
332
333        raise ValueError(
334            "cannot decode subtype %s to %s"
335            % (self.subtype, UUID_REPRESENTATION_NAMES[uuid_representation])
336        )
337
338    @property
339    def subtype(self) -> int:
340        """Subtype of this binary data."""
341        return self.__subtype
342
343    def __getnewargs__(self) -> Tuple[bytes, int]:  # type: ignore[override]
344        # Work around http://bugs.python.org/issue7382
345        data = super(Binary, self).__getnewargs__()[0]
346        if not isinstance(data, bytes):
347            data = data.encode("latin-1")
348        return data, self.__subtype
349
350    def __eq__(self, other: Any) -> bool:
351        if isinstance(other, Binary):
352            return (self.__subtype, bytes(self)) == (other.subtype, bytes(other))
353        # We don't return NotImplemented here because if we did then
354        # Binary("foo") == "foo" would return True, since Binary is a
355        # subclass of str...
356        return False
357
358    def __hash__(self) -> int:
359        return super(Binary, self).__hash__() ^ hash(self.__subtype)
360
361    def __ne__(self, other: Any) -> bool:
362        return not self == other
363
364    def __repr__(self):
365        return "Binary(%s, %s)" % (bytes.__repr__(self), self.__subtype)

Representation of BSON binary data.

This is necessary because we want to represent Python strings as the BSON string type. We need to wrap binary data so we can tell the difference between what should be considered binary data and what should be considered a string when we encode to BSON.

Raises TypeError if data is not an instance of bytes (str in python 2) or subtype is not an instance of int. Raises ValueError if subtype is not in [0, 256).

In python 3 instances of Binary with subtype 0 will be decoded directly to bytes.

:Parameters:

  • data: the binary data to represent. Can be any bytes-like type that implements the buffer protocol.
  • subtype (optional): the binary subtype to use

Changed in version 3.9: Support any bytes-like type that implements the buffer protocol.

@classmethod
def from_uuid( cls: Type[xtquant.xtbson.bson37.Binary], uuid: uuid.UUID, uuid_representation: int = 4) -> xtquant.xtbson.bson37.Binary:
236    @classmethod
237    def from_uuid(
238        cls: Type["Binary"], uuid: UUID, uuid_representation: int = UuidRepresentation.STANDARD
239    ) -> "Binary":
240        """Create a BSON Binary object from a Python UUID.
241
242        Creates a :class:`~bson.binary.Binary` object from a
243        :class:`uuid.UUID` instance. Assumes that the native
244        :class:`uuid.UUID` instance uses the byte-order implied by the
245        provided ``uuid_representation``.
246
247        Raises :exc:`TypeError` if `uuid` is not an instance of
248        :class:`~uuid.UUID`.
249
250        :Parameters:
251          - `uuid`: A :class:`uuid.UUID` instance.
252          - `uuid_representation`: A member of
253            :class:`~bson.binary.UuidRepresentation`. Default:
254            :const:`~bson.binary.UuidRepresentation.STANDARD`.
255            See :ref:`handling-uuid-data-example` for details.
256
257        .. versionadded:: 3.11
258        """
259        if not isinstance(uuid, UUID):
260            raise TypeError("uuid must be an instance of uuid.UUID")
261
262        if uuid_representation not in ALL_UUID_REPRESENTATIONS:
263            raise ValueError(
264                "uuid_representation must be a value from .binary.UuidRepresentation"
265            )
266
267        if uuid_representation == UuidRepresentation.UNSPECIFIED:
268            raise ValueError(
269                "cannot encode native uuid.UUID with "
270                "UuidRepresentation.UNSPECIFIED. UUIDs can be manually "
271                "converted to bson.Binary instances using "
272                "bson.Binary.from_uuid() or a different UuidRepresentation "
273                "can be configured. See the documentation for "
274                "UuidRepresentation for more information."
275            )
276
277        subtype = OLD_UUID_SUBTYPE
278        if uuid_representation == UuidRepresentation.PYTHON_LEGACY:
279            payload = uuid.bytes
280        elif uuid_representation == UuidRepresentation.JAVA_LEGACY:
281            from_uuid = uuid.bytes
282            payload = from_uuid[0:8][::-1] + from_uuid[8:16][::-1]
283        elif uuid_representation == UuidRepresentation.CSHARP_LEGACY:
284            payload = uuid.bytes_le
285        else:
286            # uuid_representation == UuidRepresentation.STANDARD
287            subtype = UUID_SUBTYPE
288            payload = uuid.bytes
289
290        return cls(payload, subtype)

Create a BSON Binary object from a Python UUID.

Creates a ~bson.binary.Binary object from a uuid.UUID instance. Assumes that the native uuid.UUID instance uses the byte-order implied by the provided uuid_representation.

Raises TypeError if uuid is not an instance of ~uuid.UUID.

:Parameters:

  • uuid: A uuid.UUID instance.
  • uuid_representation: A member of ~bson.binary.UuidRepresentation. Default: ~bson.binary.UuidRepresentation.STANDARD. See :ref:handling-uuid-data-example for details.

New in version 3.11.

def as_uuid(self, uuid_representation: int = 4) -> uuid.UUID:
292    def as_uuid(self, uuid_representation: int = UuidRepresentation.STANDARD) -> UUID:
293        """Create a Python UUID from this BSON Binary object.
294
295        Decodes this binary object as a native :class:`uuid.UUID` instance
296        with the provided ``uuid_representation``.
297
298        Raises :exc:`ValueError` if this :class:`~bson.binary.Binary` instance
299        does not contain a UUID.
300
301        :Parameters:
302          - `uuid_representation`: A member of
303            :class:`~bson.binary.UuidRepresentation`. Default:
304            :const:`~bson.binary.UuidRepresentation.STANDARD`.
305            See :ref:`handling-uuid-data-example` for details.
306
307        .. versionadded:: 3.11
308        """
309        if self.subtype not in ALL_UUID_SUBTYPES:
310            raise ValueError("cannot decode subtype %s as a uuid" % (self.subtype,))
311
312        if uuid_representation not in ALL_UUID_REPRESENTATIONS:
313            raise ValueError(
314                "uuid_representation must be a value from .binary.UuidRepresentation"
315            )
316
317        if uuid_representation == UuidRepresentation.UNSPECIFIED:
318            raise ValueError("uuid_representation cannot be UNSPECIFIED")
319        elif uuid_representation == UuidRepresentation.PYTHON_LEGACY:
320            if self.subtype == OLD_UUID_SUBTYPE:
321                return UUID(bytes=self)
322        elif uuid_representation == UuidRepresentation.JAVA_LEGACY:
323            if self.subtype == OLD_UUID_SUBTYPE:
324                return UUID(bytes=self[0:8][::-1] + self[8:16][::-1])
325        elif uuid_representation == UuidRepresentation.CSHARP_LEGACY:
326            if self.subtype == OLD_UUID_SUBTYPE:
327                return UUID(bytes_le=self)
328        else:
329            # uuid_representation == UuidRepresentation.STANDARD
330            if self.subtype == UUID_SUBTYPE:
331                return UUID(bytes=self)
332
333        raise ValueError(
334            "cannot decode subtype %s to %s"
335            % (self.subtype, UUID_REPRESENTATION_NAMES[uuid_representation])
336        )

Create a Python UUID from this BSON Binary object.

Decodes this binary object as a native uuid.UUID instance with the provided uuid_representation.

Raises ValueError if this ~bson.binary.Binary instance does not contain a UUID.

:Parameters:

  • uuid_representation: A member of ~bson.binary.UuidRepresentation. Default: ~bson.binary.UuidRepresentation.STANDARD. See :ref:handling-uuid-data-example for details.

New in version 3.11.

subtype: int

Subtype of this binary data.

Inherited Members
builtins.bytes
capitalize
center
count
decode
endswith
expandtabs
find
fromhex
hex
index
isalnum
isalpha
isascii
isdigit
islower
isspace
istitle
isupper
join
ljust
lower
lstrip
maketrans
partition
replace
removeprefix
removesuffix
rfind
rindex
rjust
rpartition
rsplit
rstrip
split
splitlines
startswith
strip
swapcase
title
translate
upper
zfill
class UuidRepresentation:
 66class UuidRepresentation:
 67    UNSPECIFIED = 0
 68    """An unspecified UUID representation.
 69
 70    When configured, :class:`uuid.UUID` instances will **not** be
 71    automatically encoded to or decoded from :class:`~bson.binary.Binary`.
 72    When encoding a :class:`uuid.UUID` instance, an error will be raised.
 73    To encode a :class:`uuid.UUID` instance with this configuration, it must
 74    be wrapped in the :class:`~bson.binary.Binary` class by the application
 75    code. When decoding a BSON binary field with a UUID subtype, a
 76    :class:`~bson.binary.Binary` instance will be returned instead of a
 77    :class:`uuid.UUID` instance.
 78
 79    See :ref:`unspecified-representation-details` for details.
 80
 81    .. versionadded:: 3.11
 82    """
 83
 84    STANDARD = UUID_SUBTYPE
 85    """The standard UUID representation.
 86
 87    :class:`uuid.UUID` instances will automatically be encoded to
 88    and decoded from . binary, using RFC-4122 byte order with
 89    binary subtype :data:`UUID_SUBTYPE`.
 90
 91    See :ref:`standard-representation-details` for details.
 92
 93    .. versionadded:: 3.11
 94    """
 95
 96    PYTHON_LEGACY = OLD_UUID_SUBTYPE
 97    """The Python legacy UUID representation.
 98
 99    :class:`uuid.UUID` instances will automatically be encoded to
100    and decoded from . binary, using RFC-4122 byte order with
101    binary subtype :data:`OLD_UUID_SUBTYPE`.
102
103    See :ref:`python-legacy-representation-details` for details.
104
105    .. versionadded:: 3.11
106    """
107
108    JAVA_LEGACY = 5
109    """The Java legacy UUID representation.
110
111    :class:`uuid.UUID` instances will automatically be encoded to
112    and decoded from . binary subtype :data:`OLD_UUID_SUBTYPE`,
113    using the Java driver's legacy byte order.
114
115    See :ref:`java-legacy-representation-details` for details.
116
117    .. versionadded:: 3.11
118    """
119
120    CSHARP_LEGACY = 6
121    """The C#/.net legacy UUID representation.
122
123    :class:`uuid.UUID` instances will automatically be encoded to
124    and decoded from . binary subtype :data:`OLD_UUID_SUBTYPE`,
125    using the C# driver's legacy byte order.
126
127    See :ref:`csharp-legacy-representation-details` for details.
128
129    .. versionadded:: 3.11
130    """
UNSPECIFIED = 0

An unspecified UUID representation.

When configured, uuid.UUID instances will not be automatically encoded to or decoded from ~bson.binary.Binary. When encoding a uuid.UUID instance, an error will be raised. To encode a uuid.UUID instance with this configuration, it must be wrapped in the ~bson.binary.Binary class by the application code. When decoding a BSON binary field with a UUID subtype, a ~bson.binary.Binary instance will be returned instead of a uuid.UUID instance.

See :ref:unspecified-representation-details for details.

New in version 3.11.

STANDARD = 4

The standard UUID representation.

uuid.UUID instances will automatically be encoded to and decoded from . binary, using RFC-4122 byte order with binary subtype UUID_SUBTYPE.

See :ref:standard-representation-details for details.

New in version 3.11.

PYTHON_LEGACY = 3

The Python legacy UUID representation.

uuid.UUID instances will automatically be encoded to and decoded from . binary, using RFC-4122 byte order with binary subtype OLD_UUID_SUBTYPE.

See :ref:python-legacy-representation-details for details.

New in version 3.11.

JAVA_LEGACY = 5

The Java legacy UUID representation.

uuid.UUID instances will automatically be encoded to and decoded from . binary subtype OLD_UUID_SUBTYPE, using the Java driver's legacy byte order.

See :ref:java-legacy-representation-details for details.

New in version 3.11.

CSHARP_LEGACY = 6

The C#/.net legacy UUID representation.

uuid.UUID instances will automatically be encoded to and decoded from . binary subtype OLD_UUID_SUBTYPE, using the C# driver's legacy byte order.

See :ref:csharp-legacy-representation-details for details.

New in version 3.11.

class Code(builtins.str):
 23class Code(str):
 24    """BSON's JavaScript code type.
 25
 26    Raises :class:`TypeError` if `code` is not an instance of
 27    :class:`basestring` (:class:`str` in python 3) or `scope`
 28    is not ``None`` or an instance of :class:`dict`.
 29
 30    Scope variables can be set by passing a dictionary as the `scope`
 31    argument or by using keyword arguments. If a variable is set as a
 32    keyword argument it will override any setting for that variable in
 33    the `scope` dictionary.
 34
 35    :Parameters:
 36      - `code`: A string containing JavaScript code to be evaluated or another
 37        instance of Code. In the latter case, the scope of `code` becomes this
 38        Code's :attr:`scope`.
 39      - `scope` (optional): dictionary representing the scope in which
 40        `code` should be evaluated - a mapping from identifiers (as
 41        strings) to values. Defaults to ``None``. This is applied after any
 42        scope associated with a given `code` above.
 43      - `**kwargs` (optional): scope variables can also be passed as
 44        keyword arguments. These are applied after `scope` and `code`.
 45
 46    .. versionchanged:: 3.4
 47      The default value for :attr:`scope` is ``None`` instead of ``{}``.
 48
 49    """
 50
 51    _type_marker = 13
 52    __scope: Union[Mapping[str, Any], None]
 53
 54    def __new__(
 55        cls: Type["Code"],
 56        code: Union[str, "Code"],
 57        scope: Optional[Mapping[str, Any]] = None,
 58        **kwargs: Any
 59    ) -> "Code":
 60        if not isinstance(code, str):
 61            raise TypeError("code must be an instance of str")
 62
 63        self = str.__new__(cls, code)
 64
 65        try:
 66            self.__scope = code.scope  # type: ignore
 67        except AttributeError:
 68            self.__scope = None
 69
 70        if scope is not None:
 71            if not isinstance(scope, _Mapping):
 72                raise TypeError("scope must be an instance of dict")
 73            if self.__scope is not None:
 74                self.__scope.update(scope)  # type: ignore
 75            else:
 76                self.__scope = scope
 77
 78        if kwargs:
 79            if self.__scope is not None:
 80                self.__scope.update(kwargs)  # type: ignore
 81            else:
 82                self.__scope = kwargs
 83
 84        return self
 85
 86    @property
 87    def scope(self) -> Optional[Mapping[str, Any]]:
 88        """Scope dictionary for this instance or ``None``."""
 89        return self.__scope
 90
 91    def __repr__(self):
 92        return "Code(%s, %r)" % (str.__repr__(self), self.__scope)
 93
 94    def __eq__(self, other: Any) -> bool:
 95        if isinstance(other, Code):
 96            return (self.__scope, str(self)) == (other.__scope, str(other))
 97        return False
 98
 99    __hash__: Any = None
100
101    def __ne__(self, other: Any) -> bool:
102        return not self == other

BSON's JavaScript code type.

Raises TypeError if code is not an instance of basestring (str in python 3) or scope is not None or an instance of dict.

Scope variables can be set by passing a dictionary as the scope argument or by using keyword arguments. If a variable is set as a keyword argument it will override any setting for that variable in the scope dictionary.

:Parameters:

  • code: A string containing JavaScript code to be evaluated or another instance of Code. In the latter case, the scope of code becomes this Code's scope.
  • scope (optional): dictionary representing the scope in which code should be evaluated - a mapping from identifiers (as strings) to values. Defaults to None. This is applied after any scope associated with a given code above.
  • **kwargs (optional): scope variables can also be passed as keyword arguments. These are applied after scope and code.

Changed in version 3.4: The default value for scope is None instead of {}.

scope: Optional[Mapping[str, Any]]

Scope dictionary for this instance or None.

Inherited Members
builtins.str
encode
replace
split
rsplit
join
capitalize
casefold
title
center
count
expandtabs
find
partition
index
ljust
lower
lstrip
rfind
rindex
rjust
rstrip
rpartition
splitlines
strip
swapcase
translate
upper
startswith
endswith
removeprefix
removesuffix
isascii
islower
isupper
istitle
isspace
isdecimal
isdigit
isnumeric
isalpha
isalnum
isidentifier
isprintable
zfill
format
format_map
maketrans
DEFAULT_CODEC_OPTIONS = CodecOptions(document_class=dict, tz_aware=False, uuid_representation=UuidRepresentation.UNSPECIFIED, unicode_decode_error_handler='strict', tzinfo=None, type_registry=TypeRegistry(type_codecs=[], fallback_encoder=None), datetime_conversion=DatetimeConversion.DATETIME)
class CodecOptions(typing.NamedTuple):
248class CodecOptions(_BaseCodecOptions):
249    """Encapsulates options used encoding and / or decoding BSON.
250
251    The `document_class` option is used to define a custom type for use
252    decoding BSON documents. Access to the underlying raw BSON bytes for
253    a document is available using the :class:`~bson.raw_bson.RawBSONDocument`
254    type::
255
256      >>> from .raw_bson import RawBSONDocument
257      >>> from .codec_options import CodecOptions
258      >>> codec_options = CodecOptions(document_class=RawBSONDocument)
259      >>> coll = db.get_collection('test', codec_options=codec_options)
260      >>> doc = coll.find_one()
261      >>> doc.raw
262      '\\x16\\x00\\x00\\x00\\x07_id\\x00[0\\x165\\x91\\x10\\xea\\x14\\xe8\\xc5\\x8b\\x93\\x00'
263
264    The document class can be any type that inherits from
265    :class:`~collections.abc.MutableMapping`::
266
267      >>> class AttributeDict(dict):
268      ...     # A dict that supports attribute access.
269      ...     def __getattr__(self, key):
270      ...         return self[key]
271      ...     def __setattr__(self, key, value):
272      ...         self[key] = value
273      ...
274      >>> codec_options = CodecOptions(document_class=AttributeDict)
275      >>> coll = db.get_collection('test', codec_options=codec_options)
276      >>> doc = coll.find_one()
277      >>> doc._id
278      ObjectId('5b3016359110ea14e8c58b93')
279
280    See :doc:`/examples/datetimes` for examples using the `tz_aware` and
281    `tzinfo` options.
282
283    See :doc:`/examples/uuid` for examples using the `uuid_representation`
284    option.
285
286    :Parameters:
287      - `document_class`: BSON documents returned in queries will be decoded
288        to an instance of this class. Must be a subclass of
289        :class:`~collections.abc.MutableMapping`. Defaults to :class:`dict`.
290      - `tz_aware`: If ``True``, BSON datetimes will be decoded to timezone
291        aware instances of :class:`~datetime.datetime`. Otherwise they will be
292        naive. Defaults to ``False``.
293      - `uuid_representation`: The BSON representation to use when encoding
294        and decoding instances of :class:`~uuid.UUID`. Defaults to
295        :data:`~bson.binary.UuidRepresentation.UNSPECIFIED`. New
296        applications should consider setting this to
297        :data:`~bson.binary.UuidRepresentation.STANDARD` for cross language
298        compatibility. See :ref:`handling-uuid-data-example` for details.
299      - `unicode_decode_error_handler`: The error handler to apply when
300        a Unicode-related error occurs during BSON decoding that would
301        otherwise raise :exc:`UnicodeDecodeError`. Valid options include
302        'strict', 'replace', 'backslashreplace', 'surrogateescape', and
303        'ignore'. Defaults to 'strict'.
304      - `tzinfo`: A :class:`~datetime.tzinfo` subclass that specifies the
305        timezone to/from which :class:`~datetime.datetime` objects should be
306        encoded/decoded.
307      - `type_registry`: Instance of :class:`TypeRegistry` used to customize
308        encoding and decoding behavior.
309      - `datetime_conversion`: Specifies how UTC datetimes should be decoded
310        within BSON. Valid options include 'datetime_ms' to return as a
311        DatetimeMS, 'datetime' to return as a datetime.datetime and
312        raising a ValueError for out-of-range values, 'datetime_auto' to
313        return DatetimeMS objects when the underlying datetime is
314        out-of-range and 'datetime_clamp' to clamp to the minimum and
315        maximum possible datetimes. Defaults to 'datetime'.
316    .. versionchanged:: 4.0
317       The default for `uuid_representation` was changed from
318       :const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` to
319       :const:`~bson.binary.UuidRepresentation.UNSPECIFIED`.
320
321    .. versionadded:: 3.8
322       `type_registry` attribute.
323
324    .. warning:: Care must be taken when changing
325       `unicode_decode_error_handler` from its default value ('strict').
326       The 'replace' and 'ignore' modes should not be used when documents
327       retrieved from the server will be modified in the client application
328       and stored back to the server.
329    """
330
331    def __new__(
332        cls: Type["CodecOptions"],
333        document_class: Optional[Type[Mapping[str, Any]]] = None,
334        tz_aware: bool = False,
335        uuid_representation: Optional[int] = UuidRepresentation.UNSPECIFIED,
336        unicode_decode_error_handler: str = "strict",
337        tzinfo: Optional[datetime.tzinfo] = None,
338        type_registry: Optional[TypeRegistry] = None,
339        datetime_conversion: Optional[DatetimeConversion] = DatetimeConversion.DATETIME,
340    ) -> "CodecOptions":
341        doc_class = document_class or dict
342        # issubclass can raise TypeError for generic aliases like SON[str, Any].
343        # In that case we can use the base class for the comparison.
344        is_mapping = False
345        try:
346            is_mapping = issubclass(doc_class, _MutableMapping)
347        except TypeError:
348            if hasattr(doc_class, "__origin__"):
349                is_mapping = issubclass(doc_class.__origin__, _MutableMapping)  # type: ignore[union-attr]
350        if not (is_mapping or _raw_document_class(doc_class)):
351            raise TypeError(
352                "document_class must be dict, bson.son.SON, "
353                "bson.raw_bson.RawBSONDocument, or a "
354                "subclass of collections.abc.MutableMapping"
355            )
356        if not isinstance(tz_aware, bool):
357            raise TypeError("tz_aware must be True or False")
358        if uuid_representation not in ALL_UUID_REPRESENTATIONS:
359            raise ValueError(
360                "uuid_representation must be a value from .binary.UuidRepresentation"
361            )
362        if not isinstance(unicode_decode_error_handler, str):
363            raise ValueError("unicode_decode_error_handler must be a string")
364        if tzinfo is not None:
365            if not isinstance(tzinfo, datetime.tzinfo):
366                raise TypeError("tzinfo must be an instance of datetime.tzinfo")
367            if not tz_aware:
368                raise ValueError("cannot specify tzinfo without also setting tz_aware=True")
369
370        type_registry = type_registry or TypeRegistry()
371
372        if not isinstance(type_registry, TypeRegistry):
373            raise TypeError("type_registry must be an instance of TypeRegistry")
374
375        return tuple.__new__(
376            cls,
377            (
378                doc_class,
379                tz_aware,
380                uuid_representation,
381                unicode_decode_error_handler,
382                tzinfo,
383                type_registry,
384                datetime_conversion,
385            ),
386        )
387
388    def _arguments_repr(self) -> str:
389        """Representation of the arguments used to create this object."""
390        document_class_repr = "dict" if self.document_class is dict else repr(self.document_class)
391
392        uuid_rep_repr = UUID_REPRESENTATION_NAMES.get(
393            self.uuid_representation, self.uuid_representation
394        )
395
396        return (
397            "document_class=%s, tz_aware=%r, uuid_representation=%s, "
398            "unicode_decode_error_handler=%r, tzinfo=%r, "
399            "type_registry=%r, datetime_conversion=%s"
400            % (
401                document_class_repr,
402                self.tz_aware,
403                uuid_rep_repr,
404                self.unicode_decode_error_handler,
405                self.tzinfo,
406                self.type_registry,
407                self.datetime_conversion,
408            )
409        )
410
411    def _options_dict(self) -> Dict[str, Any]:
412        """Dictionary of the arguments used to create this object."""
413        # TODO: PYTHON-2442 use _asdict() instead
414        return {
415            "document_class": self.document_class,
416            "tz_aware": self.tz_aware,
417            "uuid_representation": self.uuid_representation,
418            "unicode_decode_error_handler": self.unicode_decode_error_handler,
419            "tzinfo": self.tzinfo,
420            "type_registry": self.type_registry,
421            "datetime_conversion": self.datetime_conversion,
422        }
423
424    def __repr__(self):
425        return "%s(%s)" % (self.__class__.__name__, self._arguments_repr())
426
427    def with_options(self, **kwargs: Any) -> "CodecOptions":
428        """Make a copy of this CodecOptions, overriding some options::
429
430            >>> from .codec_options import DEFAULT_CODEC_OPTIONS
431            >>> DEFAULT_CODEC_OPTIONS.tz_aware
432            False
433            >>> options = DEFAULT_CODEC_OPTIONS.with_options(tz_aware=True)
434            >>> options.tz_aware
435            True
436
437        .. versionadded:: 3.5
438        """
439        opts = self._options_dict()
440        opts.update(kwargs)
441        return CodecOptions(**opts)

Encapsulates options used encoding and / or decoding BSON.

The document_class option is used to define a custom type for use decoding BSON documents. Access to the underlying raw BSON bytes for a document is available using the ~bson.raw_bson.RawBSONDocument type::

>>> from .raw_bson import RawBSONDocument
>>> from .codec_options import CodecOptions
>>> codec_options = CodecOptions(document_class=RawBSONDocument)
>>> coll = db.get_collection('test', codec_options=codec_options)
>>> doc = coll.find_one()
>>> doc.raw
'\x16\x00\x00\x00\x07_id\x00[0\x165\x91\x10\xea\x14\xe8\xc5\x8b\x93\x00'

The document class can be any type that inherits from ~collections.abc.MutableMapping::

>>> class AttributeDict(dict):
...     # A dict that supports attribute access.
...     def __getattr__(self, key):
...         return self[key]
...     def __setattr__(self, key, value):
...         self[key] = value
...
>>> codec_options = CodecOptions(document_class=AttributeDict)
>>> coll = db.get_collection('test', codec_options=codec_options)
>>> doc = coll.find_one()
>>> doc._id
ObjectId('5b3016359110ea14e8c58b93')

See :doc:/examples/datetimes for examples using the tz_aware and tzinfo options.

See :doc:/examples/uuid for examples using the uuid_representation option.

:Parameters:

  • document_class: BSON documents returned in queries will be decoded to an instance of this class. Must be a subclass of ~collections.abc.MutableMapping. Defaults to dict.
  • tz_aware: If True, BSON datetimes will be decoded to timezone aware instances of ~datetime.datetime. Otherwise they will be naive. Defaults to False.
  • uuid_representation: The BSON representation to use when encoding and decoding instances of ~uuid.UUID. Defaults to ~bson.binary.UuidRepresentation.UNSPECIFIED. New applications should consider setting this to ~bson.binary.UuidRepresentation.STANDARD for cross language compatibility. See :ref:handling-uuid-data-example for details.
  • unicode_decode_error_handler: The error handler to apply when a Unicode-related error occurs during BSON decoding that would otherwise raise UnicodeDecodeError. Valid options include 'strict', 'replace', 'backslashreplace', 'surrogateescape', and 'ignore'. Defaults to 'strict'.
  • tzinfo: A ~datetime.tzinfo subclass that specifies the timezone to/from which ~datetime.datetime objects should be encoded/decoded.
  • type_registry: Instance of TypeRegistry used to customize encoding and decoding behavior.
  • datetime_conversion: Specifies how UTC datetimes should be decoded within BSON. Valid options include 'datetime_ms' to return as a DatetimeMS, 'datetime' to return as a datetime.datetime and raising a ValueError for out-of-range values, 'datetime_auto' to return DatetimeMS objects when the underlying datetime is out-of-range and 'datetime_clamp' to clamp to the minimum and maximum possible datetimes. Defaults to 'datetime'. Changed in version 4.0: The default for uuid_representation was changed from ~bson.binary.UuidRepresentation.PYTHON_LEGACY to ~bson.binary.UuidRepresentation.UNSPECIFIED.

New in version 3.8: type_registry attribute.

Care must be taken when changing

unicode_decode_error_handler from its default value ('strict'). The 'replace' and 'ignore' modes should not be used when documents retrieved from the server will be modified in the client application and stored back to the server.

def with_options(self, **kwargs: Any) -> xtquant.xtbson.bson37.CodecOptions:
427    def with_options(self, **kwargs: Any) -> "CodecOptions":
428        """Make a copy of this CodecOptions, overriding some options::
429
430            >>> from .codec_options import DEFAULT_CODEC_OPTIONS
431            >>> DEFAULT_CODEC_OPTIONS.tz_aware
432            False
433            >>> options = DEFAULT_CODEC_OPTIONS.with_options(tz_aware=True)
434            >>> options.tz_aware
435            True
436
437        .. versionadded:: 3.5
438        """
439        opts = self._options_dict()
440        opts.update(kwargs)
441        return CodecOptions(**opts)

Make a copy of this CodecOptions, overriding some options::

>>> from .codec_options import DEFAULT_CODEC_OPTIONS
>>> DEFAULT_CODEC_OPTIONS.tz_aware
False
>>> options = DEFAULT_CODEC_OPTIONS.with_options(tz_aware=True)
>>> options.tz_aware
True

New in version 3.5.

Inherited Members
xtquant.xtbson.bson37.codec_options._BaseCodecOptions
document_class
tz_aware
uuid_representation
unicode_decode_error_handler
tzinfo
type_registry
datetime_conversion
builtins.tuple
index
count
class DBRef:
 25class DBRef(object):
 26    """A reference to a document stored in MongoDB."""
 27
 28    __slots__ = "__collection", "__id", "__database", "__kwargs"
 29    __getstate__ = _getstate_slots
 30    __setstate__ = _setstate_slots
 31    # DBRef isn't actually a BSON "type" so this number was arbitrarily chosen.
 32    _type_marker = 100
 33
 34    def __init__(
 35        self,
 36        collection: str,
 37        id: Any,
 38        database: Optional[str] = None,
 39        _extra: Optional[Mapping[str, Any]] = None,
 40        **kwargs: Any
 41    ) -> None:
 42        """Initialize a new :class:`DBRef`.
 43
 44        Raises :class:`TypeError` if `collection` or `database` is not
 45        an instance of :class:`basestring` (:class:`str` in python 3).
 46        `database` is optional and allows references to documents to work
 47        across databases. Any additional keyword arguments will create
 48        additional fields in the resultant embedded document.
 49
 50        :Parameters:
 51          - `collection`: name of the collection the document is stored in
 52          - `id`: the value of the document's ``"_id"`` field
 53          - `database` (optional): name of the database to reference
 54          - `**kwargs` (optional): additional keyword arguments will
 55            create additional, custom fields
 56
 57        .. seealso:: The MongoDB documentation on `dbrefs <https://dochub.mongodb.org/core/dbrefs>`_.
 58        """
 59        if not isinstance(collection, str):
 60            raise TypeError("collection must be an instance of str")
 61        if database is not None and not isinstance(database, str):
 62            raise TypeError("database must be an instance of str")
 63
 64        self.__collection = collection
 65        self.__id = id
 66        self.__database = database
 67        kwargs.update(_extra or {})
 68        self.__kwargs = kwargs
 69
 70    @property
 71    def collection(self) -> str:
 72        """Get the name of this DBRef's collection."""
 73        return self.__collection
 74
 75    @property
 76    def id(self) -> Any:
 77        """Get this DBRef's _id."""
 78        return self.__id
 79
 80    @property
 81    def database(self) -> Optional[str]:
 82        """Get the name of this DBRef's database.
 83
 84        Returns None if this DBRef doesn't specify a database.
 85        """
 86        return self.__database
 87
 88    def __getattr__(self, key: Any) -> Any:
 89        try:
 90            return self.__kwargs[key]
 91        except KeyError:
 92            raise AttributeError(key)
 93
 94    def as_doc(self) -> SON[str, Any]:
 95        """Get the SON document representation of this DBRef.
 96
 97        Generally not needed by application developers
 98        """
 99        doc = SON([("$ref", self.collection), ("$id", self.id)])
100        if self.database is not None:
101            doc["$db"] = self.database
102        doc.update(self.__kwargs)
103        return doc
104
105    def __repr__(self):
106        extra = "".join([", %s=%r" % (k, v) for k, v in self.__kwargs.items()])
107        if self.database is None:
108            return "DBRef(%r, %r%s)" % (self.collection, self.id, extra)
109        return "DBRef(%r, %r, %r%s)" % (self.collection, self.id, self.database, extra)
110
111    def __eq__(self, other: Any) -> bool:
112        if isinstance(other, DBRef):
113            us = (self.__database, self.__collection, self.__id, self.__kwargs)
114            them = (other.__database, other.__collection, other.__id, other.__kwargs)
115            return us == them
116        return NotImplemented
117
118    def __ne__(self, other: Any) -> bool:
119        return not self == other
120
121    def __hash__(self) -> int:
122        """Get a hash value for this :class:`DBRef`."""
123        return hash(
124            (self.__collection, self.__id, self.__database, tuple(sorted(self.__kwargs.items())))
125        )
126
127    def __deepcopy__(self, memo: Any) -> "DBRef":
128        """Support function for `copy.deepcopy()`."""
129        return DBRef(
130            deepcopy(self.__collection, memo),
131            deepcopy(self.__id, memo),
132            deepcopy(self.__database, memo),
133            deepcopy(self.__kwargs, memo),
134        )

A reference to a document stored in MongoDB.

DBRef( collection: str, id: Any, database: Optional[str] = None, _extra: Optional[Mapping[str, Any]] = None, **kwargs: Any)
34    def __init__(
35        self,
36        collection: str,
37        id: Any,
38        database: Optional[str] = None,
39        _extra: Optional[Mapping[str, Any]] = None,
40        **kwargs: Any
41    ) -> None:
42        """Initialize a new :class:`DBRef`.
43
44        Raises :class:`TypeError` if `collection` or `database` is not
45        an instance of :class:`basestring` (:class:`str` in python 3).
46        `database` is optional and allows references to documents to work
47        across databases. Any additional keyword arguments will create
48        additional fields in the resultant embedded document.
49
50        :Parameters:
51          - `collection`: name of the collection the document is stored in
52          - `id`: the value of the document's ``"_id"`` field
53          - `database` (optional): name of the database to reference
54          - `**kwargs` (optional): additional keyword arguments will
55            create additional, custom fields
56
57        .. seealso:: The MongoDB documentation on `dbrefs <https://dochub.mongodb.org/core/dbrefs>`_.
58        """
59        if not isinstance(collection, str):
60            raise TypeError("collection must be an instance of str")
61        if database is not None and not isinstance(database, str):
62            raise TypeError("database must be an instance of str")
63
64        self.__collection = collection
65        self.__id = id
66        self.__database = database
67        kwargs.update(_extra or {})
68        self.__kwargs = kwargs

Initialize a new DBRef.

Raises TypeError if collection or database is not an instance of basestring (str in python 3). database is optional and allows references to documents to work across databases. Any additional keyword arguments will create additional fields in the resultant embedded document.

:Parameters:

  • collection: name of the collection the document is stored in
  • id: the value of the document's "_id" field
  • database (optional): name of the database to reference
  • **kwargs (optional): additional keyword arguments will create additional, custom fields

seealso The MongoDB documentation on dbrefs ..

collection: str

Get the name of this DBRef's collection.

id: Any

Get this DBRef's _id.

database: Optional[str]

Get the name of this DBRef's database.

Returns None if this DBRef doesn't specify a database.

def as_doc(self) -> xtquant.xtbson.bson37.SON[str, typing.Any]:
 94    def as_doc(self) -> SON[str, Any]:
 95        """Get the SON document representation of this DBRef.
 96
 97        Generally not needed by application developers
 98        """
 99        doc = SON([("$ref", self.collection), ("$id", self.id)])
100        if self.database is not None:
101            doc["$db"] = self.database
102        doc.update(self.__kwargs)
103        return doc

Get the SON document representation of this DBRef.

Generally not needed by application developers

class Decimal128:
119class Decimal128(object):
120    """BSON Decimal128 type::
121
122      >>> Decimal128(Decimal("0.0005"))
123      Decimal128('0.0005')
124      >>> Decimal128("0.0005")
125      Decimal128('0.0005')
126      >>> Decimal128((3474527112516337664, 5))
127      Decimal128('0.0005')
128
129    :Parameters:
130      - `value`: An instance of :class:`decimal.Decimal`, string, or tuple of
131        (high bits, low bits) from Binary Integer Decimal (BID) format.
132
133    .. note:: :class:`~Decimal128` uses an instance of :class:`decimal.Context`
134      configured for IEEE-754 Decimal128 when validating parameters.
135      Signals like :class:`decimal.InvalidOperation`, :class:`decimal.Inexact`,
136      and :class:`decimal.Overflow` are trapped and raised as exceptions::
137
138        >>> Decimal128(".13.1")
139        Traceback (most recent call last):
140          File "<stdin>", line 1, in <module>
141          ...
142        decimal.InvalidOperation: [<class 'decimal.ConversionSyntax'>]
143        >>>
144        >>> Decimal128("1E-6177")
145        Traceback (most recent call last):
146          File "<stdin>", line 1, in <module>
147          ...
148        decimal.Inexact: [<class 'decimal.Inexact'>]
149        >>>
150        >>> Decimal128("1E6145")
151        Traceback (most recent call last):
152          File "<stdin>", line 1, in <module>
153          ...
154        decimal.Overflow: [<class 'decimal.Overflow'>, <class 'decimal.Rounded'>]
155
156      To ensure the result of a calculation can always be stored as BSON
157      Decimal128 use the context returned by
158      :func:`create_decimal128_context`::
159
160        >>> import decimal
161        >>> decimal128_ctx = create_decimal128_context()
162        >>> with decimal.localcontext(decimal128_ctx) as ctx:
163        ...     Decimal128(ctx.create_decimal(".13.3"))
164        ...
165        Decimal128('NaN')
166        >>>
167        >>> with decimal.localcontext(decimal128_ctx) as ctx:
168        ...     Decimal128(ctx.create_decimal("1E-6177"))
169        ...
170        Decimal128('0E-6176')
171        >>>
172        >>> with decimal.localcontext(DECIMAL128_CTX) as ctx:
173        ...     Decimal128(ctx.create_decimal("1E6145"))
174        ...
175        Decimal128('Infinity')
176
177      To match the behavior of MongoDB's Decimal128 implementation
178      str(Decimal(value)) may not match str(Decimal128(value)) for NaN values::
179
180        >>> Decimal128(Decimal('NaN'))
181        Decimal128('NaN')
182        >>> Decimal128(Decimal('-NaN'))
183        Decimal128('NaN')
184        >>> Decimal128(Decimal('sNaN'))
185        Decimal128('NaN')
186        >>> Decimal128(Decimal('-sNaN'))
187        Decimal128('NaN')
188
189      However, :meth:`~Decimal128.to_decimal` will return the exact value::
190
191        >>> Decimal128(Decimal('NaN')).to_decimal()
192        Decimal('NaN')
193        >>> Decimal128(Decimal('-NaN')).to_decimal()
194        Decimal('-NaN')
195        >>> Decimal128(Decimal('sNaN')).to_decimal()
196        Decimal('sNaN')
197        >>> Decimal128(Decimal('-sNaN')).to_decimal()
198        Decimal('-sNaN')
199
200      Two instances of :class:`Decimal128` compare equal if their Binary
201      Integer Decimal encodings are equal::
202
203        >>> Decimal128('NaN') == Decimal128('NaN')
204        True
205        >>> Decimal128('NaN').bid == Decimal128('NaN').bid
206        True
207
208      This differs from :class:`decimal.Decimal` comparisons for NaN::
209
210        >>> Decimal('NaN') == Decimal('NaN')
211        False
212    """
213
214    __slots__ = ("__high", "__low")
215
216    _type_marker = 19
217
218    def __init__(self, value: _VALUE_OPTIONS) -> None:
219        if isinstance(value, (str, decimal.Decimal)):
220            self.__high, self.__low = _decimal_to_128(value)
221        elif isinstance(value, (list, tuple)):
222            if len(value) != 2:
223                raise ValueError(
224                    "Invalid size for creation of Decimal128 "
225                    "from list or tuple. Must have exactly 2 "
226                    "elements."
227                )
228            self.__high, self.__low = value  # type: ignore
229        else:
230            raise TypeError("Cannot convert %r to Decimal128" % (value,))
231
232    def to_decimal(self) -> decimal.Decimal:
233        """Returns an instance of :class:`decimal.Decimal` for this
234        :class:`Decimal128`.
235        """
236        high = self.__high
237        low = self.__low
238        sign = 1 if (high & _SIGN) else 0
239
240        if (high & _SNAN) == _SNAN:
241            return decimal.Decimal((sign, (), "N"))  # type: ignore
242        elif (high & _NAN) == _NAN:
243            return decimal.Decimal((sign, (), "n"))  # type: ignore
244        elif (high & _INF) == _INF:
245            return decimal.Decimal((sign, (), "F"))  # type: ignore
246
247        if (high & _EXPONENT_MASK) == _EXPONENT_MASK:
248            exponent = ((high & 0x1FFFE00000000000) >> 47) - _EXPONENT_BIAS
249            return decimal.Decimal((sign, (0,), exponent))
250        else:
251            exponent = ((high & 0x7FFF800000000000) >> 49) - _EXPONENT_BIAS
252
253        arr = bytearray(15)
254        mask = 0x00000000000000FF
255        for i in range(14, 6, -1):
256            arr[i] = (low & mask) >> ((14 - i) << 3)
257            mask = mask << 8
258
259        mask = 0x00000000000000FF
260        for i in range(6, 0, -1):
261            arr[i] = (high & mask) >> ((6 - i) << 3)
262            mask = mask << 8
263
264        mask = 0x0001000000000000
265        arr[0] = (high & mask) >> 48
266
267        # cdecimal only accepts a tuple for digits.
268        digits = tuple(int(digit) for digit in str(int.from_bytes(arr, "big")))
269
270        with decimal.localcontext(_DEC128_CTX) as ctx:
271            return ctx.create_decimal((sign, digits, exponent))
272
273    @classmethod
274    def from_bid(cls: Type["Decimal128"], value: bytes) -> "Decimal128":
275        """Create an instance of :class:`Decimal128` from Binary Integer
276        Decimal string.
277
278        :Parameters:
279          - `value`: 16 byte string (128-bit IEEE 754-2008 decimal floating
280            point in Binary Integer Decimal (BID) format).
281        """
282        if not isinstance(value, bytes):
283            raise TypeError("value must be an instance of bytes")
284        if len(value) != 16:
285            raise ValueError("value must be exactly 16 bytes")
286        return cls((_UNPACK_64(value[8:])[0], _UNPACK_64(value[:8])[0]))  # type: ignore
287
288    @property
289    def bid(self) -> bytes:
290        """The Binary Integer Decimal (BID) encoding of this instance."""
291        return _PACK_64(self.__low) + _PACK_64(self.__high)
292
293    def __str__(self) -> str:
294        dec = self.to_decimal()
295        if dec.is_nan():
296            # Required by the drivers spec to match MongoDB behavior.
297            return "NaN"
298        return str(dec)
299
300    def __repr__(self):
301        return "Decimal128('%s')" % (str(self),)
302
303    def __setstate__(self, value: Tuple[int, int]) -> None:
304        self.__high, self.__low = value
305
306    def __getstate__(self) -> Tuple[int, int]:
307        return self.__high, self.__low
308
309    def __eq__(self, other: Any) -> bool:
310        if isinstance(other, Decimal128):
311            return self.bid == other.bid
312        return NotImplemented
313
314    def __ne__(self, other: Any) -> bool:
315        return not self == other

BSON Decimal128 type::

>>> Decimal128(Decimal("0.0005"))
Decimal128('0.0005')
>>> Decimal128("0.0005")
Decimal128('0.0005')
>>> Decimal128((3474527112516337664, 5))
Decimal128('0.0005')

:Parameters:

  • value: An instance of decimal.Decimal, string, or tuple of (high bits, low bits) from Binary Integer Decimal (BID) format.
~Decimal128 uses an instance of decimal.Context

configured for IEEE-754 Decimal128 when validating parameters. Signals like decimal.InvalidOperation, decimal.Inexact, and decimal.Overflow are trapped and raised as exceptions::

>>> Decimal128(".13.1")
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  ...
decimal.InvalidOperation: [<class 'decimal.ConversionSyntax'>]
>>>
>>> Decimal128("1E-6177")
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  ...
decimal.Inexact: [<class 'decimal.Inexact'>]
>>>
>>> Decimal128("1E6145")
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  ...
decimal.Overflow: [<class 'decimal.Overflow'>, <class 'decimal.Rounded'>]

To ensure the result of a calculation can always be stored as BSON Decimal128 use the context returned by create_decimal128_context()::

>>> import decimal
>>> decimal128_ctx = create_decimal128_context()
>>> with decimal.localcontext(decimal128_ctx) as ctx:
...     Decimal128(ctx.create_decimal(".13.3"))
...
Decimal128('NaN')
>>>
>>> with decimal.localcontext(decimal128_ctx) as ctx:
...     Decimal128(ctx.create_decimal("1E-6177"))
...
Decimal128('0E-6176')
>>>
>>> with decimal.localcontext(DECIMAL128_CTX) as ctx:
...     Decimal128(ctx.create_decimal("1E6145"))
...
Decimal128('Infinity')

To match the behavior of MongoDB's Decimal128 implementation str(Decimal(value)) may not match str(Decimal128(value)) for NaN values::

>>> Decimal128(Decimal('NaN'))
Decimal128('NaN')
>>> Decimal128(Decimal('-NaN'))
Decimal128('NaN')
>>> Decimal128(Decimal('sNaN'))
Decimal128('NaN')
>>> Decimal128(Decimal('-sNaN'))
Decimal128('NaN')

However, ~Decimal128.to_decimal() will return the exact value::

>>> Decimal128(Decimal('NaN')).to_decimal()
Decimal('NaN')
>>> Decimal128(Decimal('-NaN')).to_decimal()
Decimal('-NaN')
>>> Decimal128(Decimal('sNaN')).to_decimal()
Decimal('sNaN')
>>> Decimal128(Decimal('-sNaN')).to_decimal()
Decimal('-sNaN')

Two instances of Decimal128 compare equal if their Binary Integer Decimal encodings are equal::

>>> Decimal128('NaN') == Decimal128('NaN')
True
>>> Decimal128('NaN').bid == Decimal128('NaN').bid
True

This differs from decimal.Decimal comparisons for NaN::

>>> Decimal('NaN') == Decimal('NaN')
False
Decimal128( value: Union[decimal.Decimal, float, str, Tuple[int, Sequence[int], int]])
218    def __init__(self, value: _VALUE_OPTIONS) -> None:
219        if isinstance(value, (str, decimal.Decimal)):
220            self.__high, self.__low = _decimal_to_128(value)
221        elif isinstance(value, (list, tuple)):
222            if len(value) != 2:
223                raise ValueError(
224                    "Invalid size for creation of Decimal128 "
225                    "from list or tuple. Must have exactly 2 "
226                    "elements."
227                )
228            self.__high, self.__low = value  # type: ignore
229        else:
230            raise TypeError("Cannot convert %r to Decimal128" % (value,))
def to_decimal(self) -> decimal.Decimal:
232    def to_decimal(self) -> decimal.Decimal:
233        """Returns an instance of :class:`decimal.Decimal` for this
234        :class:`Decimal128`.
235        """
236        high = self.__high
237        low = self.__low
238        sign = 1 if (high & _SIGN) else 0
239
240        if (high & _SNAN) == _SNAN:
241            return decimal.Decimal((sign, (), "N"))  # type: ignore
242        elif (high & _NAN) == _NAN:
243            return decimal.Decimal((sign, (), "n"))  # type: ignore
244        elif (high & _INF) == _INF:
245            return decimal.Decimal((sign, (), "F"))  # type: ignore
246
247        if (high & _EXPONENT_MASK) == _EXPONENT_MASK:
248            exponent = ((high & 0x1FFFE00000000000) >> 47) - _EXPONENT_BIAS
249            return decimal.Decimal((sign, (0,), exponent))
250        else:
251            exponent = ((high & 0x7FFF800000000000) >> 49) - _EXPONENT_BIAS
252
253        arr = bytearray(15)
254        mask = 0x00000000000000FF
255        for i in range(14, 6, -1):
256            arr[i] = (low & mask) >> ((14 - i) << 3)
257            mask = mask << 8
258
259        mask = 0x00000000000000FF
260        for i in range(6, 0, -1):
261            arr[i] = (high & mask) >> ((6 - i) << 3)
262            mask = mask << 8
263
264        mask = 0x0001000000000000
265        arr[0] = (high & mask) >> 48
266
267        # cdecimal only accepts a tuple for digits.
268        digits = tuple(int(digit) for digit in str(int.from_bytes(arr, "big")))
269
270        with decimal.localcontext(_DEC128_CTX) as ctx:
271            return ctx.create_decimal((sign, digits, exponent))

Returns an instance of decimal.Decimal for this Decimal128.

@classmethod
def from_bid( cls: Type[xtquant.xtbson.bson37.Decimal128], value: bytes) -> xtquant.xtbson.bson37.Decimal128:
273    @classmethod
274    def from_bid(cls: Type["Decimal128"], value: bytes) -> "Decimal128":
275        """Create an instance of :class:`Decimal128` from Binary Integer
276        Decimal string.
277
278        :Parameters:
279          - `value`: 16 byte string (128-bit IEEE 754-2008 decimal floating
280            point in Binary Integer Decimal (BID) format).
281        """
282        if not isinstance(value, bytes):
283            raise TypeError("value must be an instance of bytes")
284        if len(value) != 16:
285            raise ValueError("value must be exactly 16 bytes")
286        return cls((_UNPACK_64(value[8:])[0], _UNPACK_64(value[:8])[0]))  # type: ignore

Create an instance of Decimal128 from Binary Integer Decimal string.

:Parameters:

  • value: 16 byte string (128-bit IEEE 754-2008 decimal floating point in Binary Integer Decimal (BID) format).
bid: bytes

The Binary Integer Decimal (BID) encoding of this instance.

class InvalidBSON(xtquant.xtbson.bson37.errors.BSONError):
23class InvalidBSON(BSONError):
24    """Raised when trying to create a BSON object from invalid data."""

Raised when trying to create a BSON object from invalid data.

Inherited Members
builtins.Exception
Exception
builtins.BaseException
with_traceback
args
class InvalidDocument(xtquant.xtbson.bson37.errors.BSONError):
31class InvalidDocument(BSONError):
32    """Raised when trying to create a BSON object from an invalid document."""

Raised when trying to create a BSON object from an invalid document.

Inherited Members
builtins.Exception
Exception
builtins.BaseException
with_traceback
args
class InvalidStringData(xtquant.xtbson.bson37.errors.BSONError):
27class InvalidStringData(BSONError):
28    """Raised when trying to encode a string containing non-UTF8 data."""

Raised when trying to encode a string containing non-UTF8 data.

Inherited Members
builtins.Exception
Exception
builtins.BaseException
with_traceback
args
class Int64(builtins.int):
21class Int64(int):
22    """Representation of the BSON int64 type.
23
24    This is necessary because every integral number is an :class:`int` in
25    Python 3. Small integral numbers are encoded to BSON int32 by default,
26    but Int64 numbers will always be encoded to BSON int64.
27
28    :Parameters:
29      - `value`: the numeric value to represent
30    """
31
32    __slots__ = ()
33
34    _type_marker = 18
35
36    def __getstate__(self) -> Any:
37        return {}
38
39    def __setstate__(self, state: Any) -> None:
40        pass

Representation of the BSON int64 type.

This is necessary because every integral number is an int in Python 3. Small integral numbers are encoded to BSON int32 by default, but Int64 numbers will always be encoded to BSON int64.

:Parameters:

  • value: the numeric value to represent
Inherited Members
builtins.int
conjugate
bit_length
bit_count
to_bytes
from_bytes
as_integer_ratio
real
imag
numerator
denominator
class MaxKey:
21class MaxKey(object):
22    """MongoDB internal MaxKey type."""
23
24    __slots__ = ()
25
26    _type_marker = 127
27
28    def __getstate__(self) -> Any:
29        return {}
30
31    def __setstate__(self, state: Any) -> None:
32        pass
33
34    def __eq__(self, other: Any) -> bool:
35        return isinstance(other, MaxKey)
36
37    def __hash__(self) -> int:
38        return hash(self._type_marker)
39
40    def __ne__(self, other: Any) -> bool:
41        return not self == other
42
43    def __le__(self, other: Any) -> bool:
44        return isinstance(other, MaxKey)
45
46    def __lt__(self, dummy: Any) -> bool:
47        return False
48
49    def __ge__(self, dummy: Any) -> bool:
50        return True
51
52    def __gt__(self, other: Any) -> bool:
53        return not isinstance(other, MaxKey)
54
55    def __repr__(self):
56        return "MaxKey()"

MongoDB internal MaxKey type.

class MinKey:
21class MinKey(object):
22    """MongoDB internal MinKey type."""
23
24    __slots__ = ()
25
26    _type_marker = 255
27
28    def __getstate__(self) -> Any:
29        return {}
30
31    def __setstate__(self, state: Any) -> None:
32        pass
33
34    def __eq__(self, other: Any) -> bool:
35        return isinstance(other, MinKey)
36
37    def __hash__(self) -> int:
38        return hash(self._type_marker)
39
40    def __ne__(self, other: Any) -> bool:
41        return not self == other
42
43    def __le__(self, dummy: Any) -> bool:
44        return True
45
46    def __lt__(self, other: Any) -> bool:
47        return not isinstance(other, MinKey)
48
49    def __ge__(self, other: Any) -> bool:
50        return isinstance(other, MinKey)
51
52    def __gt__(self, dummy: Any) -> bool:
53        return False
54
55    def __repr__(self):
56        return "MinKey()"

MongoDB internal MinKey type.

class ObjectId:
 47class ObjectId(object):
 48    """A MongoDB ObjectId."""
 49
 50    _pid = os.getpid()
 51
 52    _inc = SystemRandom().randint(0, _MAX_COUNTER_VALUE)
 53    _inc_lock = threading.Lock()
 54
 55    __random = _random_bytes()
 56
 57    __slots__ = ("__id",)
 58
 59    _type_marker = 7
 60
 61    def __init__(self, oid: Optional[Union[str, "ObjectId", bytes]] = None) -> None:
 62        """Initialize a new ObjectId.
 63
 64        An ObjectId is a 12-byte unique identifier consisting of:
 65
 66          - a 4-byte value representing the seconds since the Unix epoch,
 67          - a 5-byte random value,
 68          - a 3-byte counter, starting with a random value.
 69
 70        By default, ``ObjectId()`` creates a new unique identifier. The
 71        optional parameter `oid` can be an :class:`ObjectId`, or any 12
 72        :class:`bytes`.
 73
 74        For example, the 12 bytes b'foo-bar-quux' do not follow the ObjectId
 75        specification but they are acceptable input::
 76
 77          >>> ObjectId(b'foo-bar-quux')
 78          ObjectId('666f6f2d6261722d71757578')
 79
 80        `oid` can also be a :class:`str` of 24 hex digits::
 81
 82          >>> ObjectId('0123456789ab0123456789ab')
 83          ObjectId('0123456789ab0123456789ab')
 84
 85        Raises :class:`~bson.errors.InvalidId` if `oid` is not 12 bytes nor
 86        24 hex digits, or :class:`TypeError` if `oid` is not an accepted type.
 87
 88        :Parameters:
 89          - `oid` (optional): a valid ObjectId.
 90
 91        .. seealso:: The MongoDB documentation on  `ObjectIds <http://dochub.mongodb.org/core/objectids>`_.
 92
 93        .. versionchanged:: 3.8
 94           :class:`~bson.objectid.ObjectId` now implements the `ObjectID
 95           specification version 0.2
 96           <https://github.com/mongodb/specifications/blob/master/source/
 97           objectid.rst>`_.
 98        """
 99        if oid is None:
100            self.__generate()
101        elif isinstance(oid, bytes) and len(oid) == 12:
102            self.__id = oid
103        else:
104            self.__validate(oid)
105
106    @classmethod
107    def from_datetime(cls: Type["ObjectId"], generation_time: datetime.datetime) -> "ObjectId":
108        """Create a dummy ObjectId instance with a specific generation time.
109
110        This method is useful for doing range queries on a field
111        containing :class:`ObjectId` instances.
112
113        .. warning::
114           It is not safe to insert a document containing an ObjectId
115           generated using this method. This method deliberately
116           eliminates the uniqueness guarantee that ObjectIds
117           generally provide. ObjectIds generated with this method
118           should be used exclusively in queries.
119
120        `generation_time` will be converted to UTC. Naive datetime
121        instances will be treated as though they already contain UTC.
122
123        An example using this helper to get documents where ``"_id"``
124        was generated before January 1, 2010 would be:
125
126        >>> gen_time = datetime.datetime(2010, 1, 1)
127        >>> dummy_id = ObjectId.from_datetime(gen_time)
128        >>> result = collection.find({"_id": {"$lt": dummy_id}})
129
130        :Parameters:
131          - `generation_time`: :class:`~datetime.datetime` to be used
132            as the generation time for the resulting ObjectId.
133        """
134        offset = generation_time.utcoffset()
135        if offset is not None:
136            generation_time = generation_time - offset
137        timestamp = calendar.timegm(generation_time.timetuple())
138        oid = struct.pack(">I", int(timestamp)) + b"\x00\x00\x00\x00\x00\x00\x00\x00"
139        return cls(oid)
140
141    @classmethod
142    def is_valid(cls: Type["ObjectId"], oid: Any) -> bool:
143        """Checks if a `oid` string is valid or not.
144
145        :Parameters:
146          - `oid`: the object id to validate
147
148        .. versionadded:: 2.3
149        """
150        if not oid:
151            return False
152
153        try:
154            ObjectId(oid)
155            return True
156        except (InvalidId, TypeError):
157            return False
158
159    @classmethod
160    def _random(cls) -> bytes:
161        """Generate a 5-byte random number once per process."""
162        pid = os.getpid()
163        if pid != cls._pid:
164            cls._pid = pid
165            cls.__random = _random_bytes()
166        return cls.__random
167
168    def __generate(self) -> None:
169        """Generate a new value for this ObjectId."""
170
171        # 4 bytes current time
172        oid = struct.pack(">I", int(time.time()))
173
174        # 5 bytes random
175        oid += ObjectId._random()
176
177        # 3 bytes inc
178        with ObjectId._inc_lock:
179            oid += struct.pack(">I", ObjectId._inc)[1:4]
180            ObjectId._inc = (ObjectId._inc + 1) % (_MAX_COUNTER_VALUE + 1)
181
182        self.__id = oid
183
184    def __validate(self, oid: Any) -> None:
185        """Validate and use the given id for this ObjectId.
186
187        Raises TypeError if id is not an instance of
188        (:class:`basestring` (:class:`str` or :class:`bytes`
189        in python 3), ObjectId) and InvalidId if it is not a
190        valid ObjectId.
191
192        :Parameters:
193          - `oid`: a valid ObjectId
194        """
195        if isinstance(oid, ObjectId):
196            self.__id = oid.binary
197        elif isinstance(oid, str):
198            if len(oid) == 24:
199                try:
200                    self.__id = bytes.fromhex(oid)
201                except (TypeError, ValueError):
202                    _raise_invalid_id(oid)
203            else:
204                _raise_invalid_id(oid)
205        else:
206            raise TypeError(
207                "id must be an instance of (bytes, str, ObjectId), not %s" % (type(oid),)
208            )
209
210    @property
211    def binary(self) -> bytes:
212        """12-byte binary representation of this ObjectId."""
213        return self.__id
214
215    @property
216    def generation_time(self) -> datetime.datetime:
217        """A :class:`datetime.datetime` instance representing the time of
218        generation for this :class:`ObjectId`.
219
220        The :class:`datetime.datetime` is timezone aware, and
221        represents the generation time in UTC. It is precise to the
222        second.
223        """
224        timestamp = struct.unpack(">I", self.__id[0:4])[0]
225        return datetime.datetime.fromtimestamp(timestamp, utc)
226
227    def __getstate__(self) -> bytes:
228        """return value of object for pickling.
229        needed explicitly because __slots__() defined.
230        """
231        return self.__id
232
233    def __setstate__(self, value: Any) -> None:
234        """explicit state set from pickling"""
235        # Provide backwards compatability with OIDs
236        # pickled with pymongo-1.9 or older.
237        if isinstance(value, dict):
238            oid = value["_ObjectId__id"]
239        else:
240            oid = value
241        # ObjectIds pickled in python 2.x used `str` for __id.
242        # In python 3.x this has to be converted to `bytes`
243        # by encoding latin-1.
244        if isinstance(oid, str):
245            self.__id = oid.encode("latin-1")
246        else:
247            self.__id = oid
248
249    def __str__(self) -> str:
250        return binascii.hexlify(self.__id).decode()
251
252    def __repr__(self):
253        return "ObjectId('%s')" % (str(self),)
254
255    def __eq__(self, other: Any) -> bool:
256        if isinstance(other, ObjectId):
257            return self.__id == other.binary
258        return NotImplemented
259
260    def __ne__(self, other: Any) -> bool:
261        if isinstance(other, ObjectId):
262            return self.__id != other.binary
263        return NotImplemented
264
265    def __lt__(self, other: Any) -> bool:
266        if isinstance(other, ObjectId):
267            return self.__id < other.binary
268        return NotImplemented
269
270    def __le__(self, other: Any) -> bool:
271        if isinstance(other, ObjectId):
272            return self.__id <= other.binary
273        return NotImplemented
274
275    def __gt__(self, other: Any) -> bool:
276        if isinstance(other, ObjectId):
277            return self.__id > other.binary
278        return NotImplemented
279
280    def __ge__(self, other: Any) -> bool:
281        if isinstance(other, ObjectId):
282            return self.__id >= other.binary
283        return NotImplemented
284
285    def __hash__(self) -> int:
286        """Get a hash value for this :class:`ObjectId`."""
287        return hash(self.__id)

A MongoDB ObjectId.

ObjectId( oid: Union[str, xtquant.xtbson.bson37.ObjectId, bytes, NoneType] = None)
 61    def __init__(self, oid: Optional[Union[str, "ObjectId", bytes]] = None) -> None:
 62        """Initialize a new ObjectId.
 63
 64        An ObjectId is a 12-byte unique identifier consisting of:
 65
 66          - a 4-byte value representing the seconds since the Unix epoch,
 67          - a 5-byte random value,
 68          - a 3-byte counter, starting with a random value.
 69
 70        By default, ``ObjectId()`` creates a new unique identifier. The
 71        optional parameter `oid` can be an :class:`ObjectId`, or any 12
 72        :class:`bytes`.
 73
 74        For example, the 12 bytes b'foo-bar-quux' do not follow the ObjectId
 75        specification but they are acceptable input::
 76
 77          >>> ObjectId(b'foo-bar-quux')
 78          ObjectId('666f6f2d6261722d71757578')
 79
 80        `oid` can also be a :class:`str` of 24 hex digits::
 81
 82          >>> ObjectId('0123456789ab0123456789ab')
 83          ObjectId('0123456789ab0123456789ab')
 84
 85        Raises :class:`~bson.errors.InvalidId` if `oid` is not 12 bytes nor
 86        24 hex digits, or :class:`TypeError` if `oid` is not an accepted type.
 87
 88        :Parameters:
 89          - `oid` (optional): a valid ObjectId.
 90
 91        .. seealso:: The MongoDB documentation on  `ObjectIds <http://dochub.mongodb.org/core/objectids>`_.
 92
 93        .. versionchanged:: 3.8
 94           :class:`~bson.objectid.ObjectId` now implements the `ObjectID
 95           specification version 0.2
 96           <https://github.com/mongodb/specifications/blob/master/source/
 97           objectid.rst>`_.
 98        """
 99        if oid is None:
100            self.__generate()
101        elif isinstance(oid, bytes) and len(oid) == 12:
102            self.__id = oid
103        else:
104            self.__validate(oid)

Initialize a new ObjectId.

An ObjectId is a 12-byte unique identifier consisting of:

  • a 4-byte value representing the seconds since the Unix epoch,
  • a 5-byte random value,
  • a 3-byte counter, starting with a random value.

By default, ObjectId() creates a new unique identifier. The optional parameter oid can be an ObjectId, or any 12 bytes.

For example, the 12 bytes b'foo-bar-quux' do not follow the ObjectId specification but they are acceptable input::

>>> ObjectId(b'foo-bar-quux')
ObjectId('666f6f2d6261722d71757578')

oid can also be a str of 24 hex digits::

>>> ObjectId('0123456789ab0123456789ab')
ObjectId('0123456789ab0123456789ab')

Raises ~bson.errors.InvalidId if oid is not 12 bytes nor 24 hex digits, or TypeError if oid is not an accepted type.

:Parameters:

  • oid (optional): a valid ObjectId.

seealso The MongoDB documentation on ObjectIds ..

Changed in version 3.8: ~bson.objectid.ObjectId now implements the ObjectID specification version 0.2 <https://github.com/mongodb/specifications/blob/master/source/ objectid.rst>_.

@classmethod
def from_datetime( cls: Type[xtquant.xtbson.bson37.ObjectId], generation_time: datetime.datetime) -> xtquant.xtbson.bson37.ObjectId:
106    @classmethod
107    def from_datetime(cls: Type["ObjectId"], generation_time: datetime.datetime) -> "ObjectId":
108        """Create a dummy ObjectId instance with a specific generation time.
109
110        This method is useful for doing range queries on a field
111        containing :class:`ObjectId` instances.
112
113        .. warning::
114           It is not safe to insert a document containing an ObjectId
115           generated using this method. This method deliberately
116           eliminates the uniqueness guarantee that ObjectIds
117           generally provide. ObjectIds generated with this method
118           should be used exclusively in queries.
119
120        `generation_time` will be converted to UTC. Naive datetime
121        instances will be treated as though they already contain UTC.
122
123        An example using this helper to get documents where ``"_id"``
124        was generated before January 1, 2010 would be:
125
126        >>> gen_time = datetime.datetime(2010, 1, 1)
127        >>> dummy_id = ObjectId.from_datetime(gen_time)
128        >>> result = collection.find({"_id": {"$lt": dummy_id}})
129
130        :Parameters:
131          - `generation_time`: :class:`~datetime.datetime` to be used
132            as the generation time for the resulting ObjectId.
133        """
134        offset = generation_time.utcoffset()
135        if offset is not None:
136            generation_time = generation_time - offset
137        timestamp = calendar.timegm(generation_time.timetuple())
138        oid = struct.pack(">I", int(timestamp)) + b"\x00\x00\x00\x00\x00\x00\x00\x00"
139        return cls(oid)

Create a dummy ObjectId instance with a specific generation time.

This method is useful for doing range queries on a field containing ObjectId instances.

It is not safe to insert a document containing an ObjectId generated using this method. This method deliberately eliminates the uniqueness guarantee that ObjectIds generally provide. ObjectIds generated with this method should be used exclusively in queries.

generation_time will be converted to UTC. Naive datetime instances will be treated as though they already contain UTC.

An example using this helper to get documents where "_id" was generated before January 1, 2010 would be:

>>> gen_time = datetime.datetime(2010, 1, 1)
>>> dummy_id = ObjectId.from_datetime(gen_time)
>>> result = collection.find({"_id": {"$lt": dummy_id}})

:Parameters:

  • generation_time: ~datetime.datetime to be used as the generation time for the resulting ObjectId.
@classmethod
def is_valid(cls: Type[xtquant.xtbson.bson37.ObjectId], oid: Any) -> bool:
141    @classmethod
142    def is_valid(cls: Type["ObjectId"], oid: Any) -> bool:
143        """Checks if a `oid` string is valid or not.
144
145        :Parameters:
146          - `oid`: the object id to validate
147
148        .. versionadded:: 2.3
149        """
150        if not oid:
151            return False
152
153        try:
154            ObjectId(oid)
155            return True
156        except (InvalidId, TypeError):
157            return False

Checks if a oid string is valid or not.

:Parameters:

  • oid: the object id to validate

New in version 2.3.

binary: bytes

12-byte binary representation of this ObjectId.

generation_time: datetime.datetime

A datetime.datetime instance representing the time of generation for this ObjectId.

The datetime.datetime is timezone aware, and represents the generation time in UTC. It is precise to the second.

class Regex(typing.Generic[~_T]):
 47class Regex(Generic[_T]):
 48    """BSON regular expression data."""
 49
 50    __slots__ = ("pattern", "flags")
 51
 52    __getstate__ = _getstate_slots
 53    __setstate__ = _setstate_slots
 54
 55    _type_marker = 11
 56
 57    @classmethod
 58    def from_native(cls: Type["Regex"], regex: "Pattern[_T]") -> "Regex[_T]":
 59        """Convert a Python regular expression into a ``Regex`` instance.
 60
 61        Note that in Python 3, a regular expression compiled from a
 62        :class:`str` has the ``re.UNICODE`` flag set. If it is undesirable
 63        to store this flag in a BSON regular expression, unset it first::
 64
 65          >>> pattern = re.compile('.*')
 66          >>> regex = Regex.from_native(pattern)
 67          >>> regex.flags ^= re.UNICODE
 68          >>> db.collection.insert_one({'pattern': regex})
 69
 70        :Parameters:
 71          - `regex`: A regular expression object from ``re.compile()``.
 72
 73        .. warning::
 74           Python regular expressions use a different syntax and different
 75           set of flags than MongoDB, which uses `PCRE`_. A regular
 76           expression retrieved from the server may not compile in
 77           Python, or may match a different set of strings in Python than
 78           when used in a MongoDB query.
 79
 80        .. _PCRE: http://www.pcre.org/
 81        """
 82        if not isinstance(regex, RE_TYPE):
 83            raise TypeError("regex must be a compiled regular expression, not %s" % type(regex))
 84
 85        return Regex(regex.pattern, regex.flags)
 86
 87    def __init__(self, pattern: _T, flags: Union[str, int] = 0) -> None:
 88        """BSON regular expression data.
 89
 90        This class is useful to store and retrieve regular expressions that are
 91        incompatible with Python's regular expression dialect.
 92
 93        :Parameters:
 94          - `pattern`: string
 95          - `flags`: (optional) an integer bitmask, or a string of flag
 96            characters like "im" for IGNORECASE and MULTILINE
 97        """
 98        if not isinstance(pattern, (str, bytes)):
 99            raise TypeError("pattern must be a string, not %s" % type(pattern))
100        self.pattern: _T = pattern
101
102        if isinstance(flags, str):
103            self.flags = str_flags_to_int(flags)
104        elif isinstance(flags, int):
105            self.flags = flags
106        else:
107            raise TypeError("flags must be a string or int, not %s" % type(flags))
108
109    def __eq__(self, other: Any) -> bool:
110        if isinstance(other, Regex):
111            return self.pattern == other.pattern and self.flags == other.flags
112        else:
113            return NotImplemented
114
115    __hash__ = None  # type: ignore
116
117    def __ne__(self, other: Any) -> bool:
118        return not self == other
119
120    def __repr__(self):
121        return "Regex(%r, %r)" % (self.pattern, self.flags)
122
123    def try_compile(self) -> "Pattern[_T]":
124        """Compile this :class:`Regex` as a Python regular expression.
125
126        .. warning::
127           Python regular expressions use a different syntax and different
128           set of flags than MongoDB, which uses `PCRE`_. A regular
129           expression retrieved from the server may not compile in
130           Python, or may match a different set of strings in Python than
131           when used in a MongoDB query. :meth:`try_compile()` may raise
132           :exc:`re.error`.
133
134        .. _PCRE: http://www.pcre.org/
135        """
136        return re.compile(self.pattern, self.flags)

BSON regular expression data.

Regex(pattern: ~_T, flags: Union[str, int] = 0)
 87    def __init__(self, pattern: _T, flags: Union[str, int] = 0) -> None:
 88        """BSON regular expression data.
 89
 90        This class is useful to store and retrieve regular expressions that are
 91        incompatible with Python's regular expression dialect.
 92
 93        :Parameters:
 94          - `pattern`: string
 95          - `flags`: (optional) an integer bitmask, or a string of flag
 96            characters like "im" for IGNORECASE and MULTILINE
 97        """
 98        if not isinstance(pattern, (str, bytes)):
 99            raise TypeError("pattern must be a string, not %s" % type(pattern))
100        self.pattern: _T = pattern
101
102        if isinstance(flags, str):
103            self.flags = str_flags_to_int(flags)
104        elif isinstance(flags, int):
105            self.flags = flags
106        else:
107            raise TypeError("flags must be a string or int, not %s" % type(flags))

BSON regular expression data.

This class is useful to store and retrieve regular expressions that are incompatible with Python's regular expression dialect.

:Parameters:

  • pattern: string
  • flags: (optional) an integer bitmask, or a string of flag characters like "im" for IGNORECASE and MULTILINE
@classmethod
def from_native( cls: Type[xtquant.xtbson.bson37.Regex], regex: Pattern[~_T]) -> xtquant.xtbson.bson37.Regex[~_T]:
57    @classmethod
58    def from_native(cls: Type["Regex"], regex: "Pattern[_T]") -> "Regex[_T]":
59        """Convert a Python regular expression into a ``Regex`` instance.
60
61        Note that in Python 3, a regular expression compiled from a
62        :class:`str` has the ``re.UNICODE`` flag set. If it is undesirable
63        to store this flag in a BSON regular expression, unset it first::
64
65          >>> pattern = re.compile('.*')
66          >>> regex = Regex.from_native(pattern)
67          >>> regex.flags ^= re.UNICODE
68          >>> db.collection.insert_one({'pattern': regex})
69
70        :Parameters:
71          - `regex`: A regular expression object from ``re.compile()``.
72
73        .. warning::
74           Python regular expressions use a different syntax and different
75           set of flags than MongoDB, which uses `PCRE`_. A regular
76           expression retrieved from the server may not compile in
77           Python, or may match a different set of strings in Python than
78           when used in a MongoDB query.
79
80        .. _PCRE: http://www.pcre.org/
81        """
82        if not isinstance(regex, RE_TYPE):
83            raise TypeError("regex must be a compiled regular expression, not %s" % type(regex))
84
85        return Regex(regex.pattern, regex.flags)

Convert a Python regular expression into a Regex instance.

Note that in Python 3, a regular expression compiled from a str has the re.UNICODE flag set. If it is undesirable to store this flag in a BSON regular expression, unset it first::

>>> pattern = re.compile('.*')
>>> regex = Regex.from_native(pattern)
>>> regex.flags ^= re.UNICODE
>>> db.collection.insert_one({'pattern': regex})

:Parameters:

  • regex: A regular expression object from re.compile().

Python regular expressions use a different syntax and different set of flags than MongoDB, which uses PCRE. A regular expression retrieved from the server may not compile in Python, or may match a different set of strings in Python than when used in a MongoDB query.

pattern: ~_T
def try_compile(self) -> Pattern[~_T]:
123    def try_compile(self) -> "Pattern[_T]":
124        """Compile this :class:`Regex` as a Python regular expression.
125
126        .. warning::
127           Python regular expressions use a different syntax and different
128           set of flags than MongoDB, which uses `PCRE`_. A regular
129           expression retrieved from the server may not compile in
130           Python, or may match a different set of strings in Python than
131           when used in a MongoDB query. :meth:`try_compile()` may raise
132           :exc:`re.error`.
133
134        .. _PCRE: http://www.pcre.org/
135        """
136        return re.compile(self.pattern, self.flags)

Compile this Regex as a Python regular expression.

Python regular expressions use a different syntax and different set of flags than MongoDB, which uses PCRE. A regular expression retrieved from the server may not compile in Python, or may match a different set of strings in Python than when used in a MongoDB query. try_compile()() may raise re.error.

flags
RE_TYPE = <class 're.Pattern'>
class SON(typing.Dict[~_Key, ~_Value]):
 49class SON(Dict[_Key, _Value]):
 50    """SON data.
 51
 52    A subclass of dict that maintains ordering of keys and provides a
 53    few extra niceties for dealing with SON. SON provides an API
 54    similar to collections.OrderedDict.
 55    """
 56
 57    __keys: List[Any]
 58
 59    def __init__(
 60        self,
 61        data: Optional[Union[Mapping[_Key, _Value], Iterable[Tuple[_Key, _Value]]]] = None,
 62        **kwargs: Any
 63    ) -> None:
 64        self.__keys = []
 65        dict.__init__(self)
 66        self.update(data)
 67        self.update(kwargs)
 68
 69    def __new__(cls: Type["SON[_Key, _Value]"], *args: Any, **kwargs: Any) -> "SON[_Key, _Value]":
 70        instance = super(SON, cls).__new__(cls, *args, **kwargs)
 71        instance.__keys = []
 72        return instance
 73
 74    def __repr__(self):
 75        result = []
 76        for key in self.__keys:
 77            result.append("(%r, %r)" % (key, self[key]))
 78        return "SON([%s])" % ", ".join(result)
 79
 80    def __setitem__(self, key: _Key, value: _Value) -> None:
 81        if key not in self.__keys:
 82            self.__keys.append(key)
 83        dict.__setitem__(self, key, value)
 84
 85    def __delitem__(self, key: _Key) -> None:
 86        self.__keys.remove(key)
 87        dict.__delitem__(self, key)
 88
 89    def copy(self) -> "SON[_Key, _Value]":
 90        other: SON[_Key, _Value] = SON()
 91        other.update(self)
 92        return other
 93
 94    # TODO this is all from UserDict.DictMixin. it could probably be made more
 95    # efficient.
 96    # second level definitions support higher levels
 97    def __iter__(self) -> Iterator[_Key]:
 98        for k in self.__keys:
 99            yield k
100
101    def has_key(self, key: _Key) -> bool:
102        return key in self.__keys
103
104    def iterkeys(self) -> Iterator[_Key]:
105        return self.__iter__()
106
107    # fourth level uses definitions from lower levels
108    def itervalues(self) -> Iterator[_Value]:
109        for _, v in self.items():
110            yield v
111
112    def values(self) -> List[_Value]:  # type: ignore[override]
113        return [v for _, v in self.items()]
114
115    def clear(self) -> None:
116        self.__keys = []
117        super(SON, self).clear()
118
119    def setdefault(self, key: _Key, default: _Value) -> _Value:  # type: ignore[override]
120        try:
121            return self[key]
122        except KeyError:
123            self[key] = default
124        return default
125
126    def pop(self, key: _Key, *args: Union[_Value, _T]) -> Union[_Value, _T]:
127        if len(args) > 1:
128            raise TypeError("pop expected at most 2 arguments, got " + repr(1 + len(args)))
129        try:
130            value = self[key]
131        except KeyError:
132            if args:
133                return args[0]
134            raise
135        del self[key]
136        return value
137
138    def popitem(self) -> Tuple[_Key, _Value]:
139        try:
140            k, v = next(iter(self.items()))
141        except StopIteration:
142            raise KeyError("container is empty")
143        del self[k]
144        return (k, v)
145
146    def update(self, other: Optional[Any] = None, **kwargs: _Value) -> None:  # type: ignore[override]
147        # Make progressively weaker assumptions about "other"
148        if other is None:
149            pass
150        elif hasattr(other, "items"):
151            for k, v in other.items():
152                self[k] = v
153        elif hasattr(other, "keys"):
154            for k in other.keys():
155                self[k] = other[k]
156        else:
157            for k, v in other:
158                self[k] = v
159        if kwargs:
160            self.update(kwargs)
161
162    def get(self, key: _Key, default: Optional[Union[_Value, _T]] = None) -> Union[_Value, _T, None]:  # type: ignore[override]
163        try:
164            return self[key]
165        except KeyError:
166            return default
167
168    def __eq__(self, other: Any) -> bool:
169        """Comparison to another SON is order-sensitive while comparison to a
170        regular dictionary is order-insensitive.
171        """
172        if isinstance(other, SON):
173            return len(self) == len(other) and list(self.items()) == list(other.items())
174        return self.to_dict() == other
175
176    def __ne__(self, other: Any) -> bool:
177        return not self == other
178
179    def __len__(self) -> int:
180        return len(self.__keys)
181
182    def to_dict(self) -> Dict[_Key, _Value]:
183        """Convert a SON document to a normal Python dictionary instance.
184
185        This is trickier than just *dict(...)* because it needs to be
186        recursive.
187        """
188
189        def transform_value(value: Any) -> Any:
190            if isinstance(value, list):
191                return [transform_value(v) for v in value]
192            elif isinstance(value, _Mapping):
193                return dict([(k, transform_value(v)) for k, v in value.items()])
194            else:
195                return value
196
197        return transform_value(dict(self))
198
199    def __deepcopy__(self, memo: Dict[int, "SON[_Key, _Value]"]) -> "SON[_Key, _Value]":
200        out: SON[_Key, _Value] = SON()
201        val_id = id(self)
202        if val_id in memo:
203            return memo[val_id]
204        memo[val_id] = out
205        for k, v in self.items():
206            if not isinstance(v, RE_TYPE):
207                v = copy.deepcopy(v, memo)
208            out[k] = v
209        return out

SON data.

A subclass of dict that maintains ordering of keys and provides a few extra niceties for dealing with SON. SON provides an API similar to collections.OrderedDict.

def copy(self) -> xtquant.xtbson.bson37.SON[~_Key, ~_Value]:
89    def copy(self) -> "SON[_Key, _Value]":
90        other: SON[_Key, _Value] = SON()
91        other.update(self)
92        return other

D.copy() -> a shallow copy of D

def has_key(self, key: ~_Key) -> bool:
101    def has_key(self, key: _Key) -> bool:
102        return key in self.__keys
def iterkeys(self) -> Iterator[~_Key]:
104    def iterkeys(self) -> Iterator[_Key]:
105        return self.__iter__()
def itervalues(self) -> Iterator[~_Value]:
108    def itervalues(self) -> Iterator[_Value]:
109        for _, v in self.items():
110            yield v
def values(self) -> List[~_Value]:
112    def values(self) -> List[_Value]:  # type: ignore[override]
113        return [v for _, v in self.items()]

D.values() -> an object providing a view on D's values

def clear(self) -> None:
115    def clear(self) -> None:
116        self.__keys = []
117        super(SON, self).clear()

D.clear() -> None. Remove all items from D.

def setdefault(self, key: ~_Key, default: ~_Value) -> ~_Value:
119    def setdefault(self, key: _Key, default: _Value) -> _Value:  # type: ignore[override]
120        try:
121            return self[key]
122        except KeyError:
123            self[key] = default
124        return default

Insert key with a value of default if key is not in the dictionary.

Return the value for key if key is in the dictionary, else default.

def pop(self, key: ~_Key, *args: Union[~_Value, ~_T]) -> Union[~_Value, ~_T]:
126    def pop(self, key: _Key, *args: Union[_Value, _T]) -> Union[_Value, _T]:
127        if len(args) > 1:
128            raise TypeError("pop expected at most 2 arguments, got " + repr(1 + len(args)))
129        try:
130            value = self[key]
131        except KeyError:
132            if args:
133                return args[0]
134            raise
135        del self[key]
136        return value

D.pop(k[,d]) -> v, remove specified key and return the corresponding value.

If the key is not found, return the default if given; otherwise, raise a KeyError.

def popitem(self) -> Tuple[~_Key, ~_Value]:
138    def popitem(self) -> Tuple[_Key, _Value]:
139        try:
140            k, v = next(iter(self.items()))
141        except StopIteration:
142            raise KeyError("container is empty")
143        del self[k]
144        return (k, v)

Remove and return a (key, value) pair as a 2-tuple.

Pairs are returned in LIFO (last-in, first-out) order. Raises KeyError if the dict is empty.

def update(self, other: Optional[Any] = None, **kwargs: ~_Value) -> None:
146    def update(self, other: Optional[Any] = None, **kwargs: _Value) -> None:  # type: ignore[override]
147        # Make progressively weaker assumptions about "other"
148        if other is None:
149            pass
150        elif hasattr(other, "items"):
151            for k, v in other.items():
152                self[k] = v
153        elif hasattr(other, "keys"):
154            for k in other.keys():
155                self[k] = other[k]
156        else:
157            for k, v in other:
158                self[k] = v
159        if kwargs:
160            self.update(kwargs)

D.update([E, ]**F) -> None. Update D from dict/iterable E and F. If E is present and has a .keys() method, then does: for k in E: D[k] = E[k] If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v In either case, this is followed by: for k in F: D[k] = F[k]

def get( self, key: ~_Key, default: Union[~_Value, ~_T, NoneType] = None) -> Union[~_Value, ~_T, NoneType]:
162    def get(self, key: _Key, default: Optional[Union[_Value, _T]] = None) -> Union[_Value, _T, None]:  # type: ignore[override]
163        try:
164            return self[key]
165        except KeyError:
166            return default

Return the value for key if key is in the dictionary, else default.

def to_dict(self) -> Dict[~_Key, ~_Value]:
182    def to_dict(self) -> Dict[_Key, _Value]:
183        """Convert a SON document to a normal Python dictionary instance.
184
185        This is trickier than just *dict(...)* because it needs to be
186        recursive.
187        """
188
189        def transform_value(value: Any) -> Any:
190            if isinstance(value, list):
191                return [transform_value(v) for v in value]
192            elif isinstance(value, _Mapping):
193                return dict([(k, transform_value(v)) for k, v in value.items()])
194            else:
195                return value
196
197        return transform_value(dict(self))

Convert a SON document to a normal Python dictionary instance.

This is trickier than just dict(...) because it needs to be recursive.

Inherited Members
builtins.dict
keys
items
fromkeys
class Timestamp:
 29class Timestamp(object):
 30    """MongoDB internal timestamps used in the opLog."""
 31
 32    __slots__ = ("__time", "__inc")
 33
 34    __getstate__ = _getstate_slots
 35    __setstate__ = _setstate_slots
 36
 37    _type_marker = 17
 38
 39    def __init__(self, time: Union[datetime.datetime, int], inc: int) -> None:
 40        """Create a new :class:`Timestamp`.
 41
 42        This class is only for use with the MongoDB opLog. If you need
 43        to store a regular timestamp, please use a
 44        :class:`~datetime.datetime`.
 45
 46        Raises :class:`TypeError` if `time` is not an instance of
 47        :class: `int` or :class:`~datetime.datetime`, or `inc` is not
 48        an instance of :class:`int`. Raises :class:`ValueError` if
 49        `time` or `inc` is not in [0, 2**32).
 50
 51        :Parameters:
 52          - `time`: time in seconds since epoch UTC, or a naive UTC
 53            :class:`~datetime.datetime`, or an aware
 54            :class:`~datetime.datetime`
 55          - `inc`: the incrementing counter
 56        """
 57        if isinstance(time, datetime.datetime):
 58            offset = time.utcoffset()
 59            if offset is not None:
 60                time = time - offset
 61            time = int(calendar.timegm(time.timetuple()))
 62        if not isinstance(time, int):
 63            raise TypeError("time must be an instance of int")
 64        if not isinstance(inc, int):
 65            raise TypeError("inc must be an instance of int")
 66        if not 0 <= time < UPPERBOUND:
 67            raise ValueError("time must be contained in [0, 2**32)")
 68        if not 0 <= inc < UPPERBOUND:
 69            raise ValueError("inc must be contained in [0, 2**32)")
 70
 71        self.__time = time
 72        self.__inc = inc
 73
 74    @property
 75    def time(self) -> int:
 76        """Get the time portion of this :class:`Timestamp`."""
 77        return self.__time
 78
 79    @property
 80    def inc(self) -> int:
 81        """Get the inc portion of this :class:`Timestamp`."""
 82        return self.__inc
 83
 84    def __eq__(self, other: Any) -> bool:
 85        if isinstance(other, Timestamp):
 86            return self.__time == other.time and self.__inc == other.inc
 87        else:
 88            return NotImplemented
 89
 90    def __hash__(self) -> int:
 91        return hash(self.time) ^ hash(self.inc)
 92
 93    def __ne__(self, other: Any) -> bool:
 94        return not self == other
 95
 96    def __lt__(self, other: Any) -> bool:
 97        if isinstance(other, Timestamp):
 98            return (self.time, self.inc) < (other.time, other.inc)
 99        return NotImplemented
100
101    def __le__(self, other: Any) -> bool:
102        if isinstance(other, Timestamp):
103            return (self.time, self.inc) <= (other.time, other.inc)
104        return NotImplemented
105
106    def __gt__(self, other: Any) -> bool:
107        if isinstance(other, Timestamp):
108            return (self.time, self.inc) > (other.time, other.inc)
109        return NotImplemented
110
111    def __ge__(self, other: Any) -> bool:
112        if isinstance(other, Timestamp):
113            return (self.time, self.inc) >= (other.time, other.inc)
114        return NotImplemented
115
116    def __repr__(self):
117        return "Timestamp(%s, %s)" % (self.__time, self.__inc)
118
119    def as_datetime(self) -> datetime.datetime:
120        """Return a :class:`~datetime.datetime` instance corresponding
121        to the time portion of this :class:`Timestamp`.
122
123        The returned datetime's timezone is UTC.
124        """
125        return datetime.datetime.fromtimestamp(self.__time, utc)

MongoDB internal timestamps used in the opLog.

Timestamp(time: Union[datetime.datetime, int], inc: int)
39    def __init__(self, time: Union[datetime.datetime, int], inc: int) -> None:
40        """Create a new :class:`Timestamp`.
41
42        This class is only for use with the MongoDB opLog. If you need
43        to store a regular timestamp, please use a
44        :class:`~datetime.datetime`.
45
46        Raises :class:`TypeError` if `time` is not an instance of
47        :class: `int` or :class:`~datetime.datetime`, or `inc` is not
48        an instance of :class:`int`. Raises :class:`ValueError` if
49        `time` or `inc` is not in [0, 2**32).
50
51        :Parameters:
52          - `time`: time in seconds since epoch UTC, or a naive UTC
53            :class:`~datetime.datetime`, or an aware
54            :class:`~datetime.datetime`
55          - `inc`: the incrementing counter
56        """
57        if isinstance(time, datetime.datetime):
58            offset = time.utcoffset()
59            if offset is not None:
60                time = time - offset
61            time = int(calendar.timegm(time.timetuple()))
62        if not isinstance(time, int):
63            raise TypeError("time must be an instance of int")
64        if not isinstance(inc, int):
65            raise TypeError("inc must be an instance of int")
66        if not 0 <= time < UPPERBOUND:
67            raise ValueError("time must be contained in [0, 2**32)")
68        if not 0 <= inc < UPPERBOUND:
69            raise ValueError("inc must be contained in [0, 2**32)")
70
71        self.__time = time
72        self.__inc = inc

Create a new Timestamp.

This class is only for use with the MongoDB opLog. If you need to store a regular timestamp, please use a ~datetime.datetime.

Raises TypeError if time is not an instance of :class: int or ~datetime.datetime, or inc is not an instance of int. Raises ValueError if time or inc is not in [0, 2**32).

:Parameters:

  • time: time in seconds since epoch UTC, or a naive UTC ~datetime.datetime, or an aware ~datetime.datetime
  • inc: the incrementing counter
time: int

Get the time portion of this Timestamp.

inc: int

Get the inc portion of this Timestamp.

def as_datetime(self) -> datetime.datetime:
119    def as_datetime(self) -> datetime.datetime:
120        """Return a :class:`~datetime.datetime` instance corresponding
121        to the time portion of this :class:`Timestamp`.
122
123        The returned datetime's timezone is UTC.
124        """
125        return datetime.datetime.fromtimestamp(self.__time, utc)

Return a ~datetime.datetime instance corresponding to the time portion of this Timestamp.

The returned datetime's timezone is UTC.

utc = <xtquant.xtbson.bson37.tz_util.FixedOffset object>
EPOCH_AWARE = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=<xtquant.xtbson.bson37.tz_util.FixedOffset object>)
EPOCH_NAIVE = datetime.datetime(1970, 1, 1, 0, 0)
BSONNUM = b'\x01'
BSONSTR = b'\x02'
BSONOBJ = b'\x03'
BSONARR = b'\x04'
BSONBIN = b'\x05'
BSONUND = b'\x06'
BSONOID = b'\x07'
BSONBOO = b'\x08'
BSONDAT = b'\t'
BSONNUL = b'\n'
BSONRGX = b'\x0b'
BSONREF = b'\x0c'
BSONCOD = b'\r'
BSONSYM = b'\x0e'
BSONCWS = b'\x0f'
BSONINT = b'\x10'
BSONTIM = b'\x11'
BSONLON = b'\x12'
BSONDEC = b'\x13'
BSONMIN = b'\xff'
BSONMAX = b'\x7f'
def get_data_and_view(data: Any) -> Tuple[Any, memoryview]:
233def get_data_and_view(data: Any) -> Tuple[Any, memoryview]:
234    if isinstance(data, (bytes, bytearray)):
235        return data, memoryview(data)
236    view = memoryview(data)
237    return view.tobytes(), view
def gen_list_name() -> Generator[bytes, NoneType, NoneType]:
625def gen_list_name() -> Generator[bytes, None, None]:
626    """Generate "keys" for encoded lists in the sequence
627    b"0\x00", b"1\x00", b"2\x00", ...
628
629    The first 1000 keys are returned from a pre-built cache. All
630    subsequent keys are generated on the fly.
631    """
632    for name in _LIST_NAMES:
633        yield name
634
635    counter = itertools.count(1000)
636    while True:
637        yield (str(next(counter)) + "\x00").encode("utf8")

Generate "keys" for encoded lists in the sequence b"0", b"1", b"2", ...

The first 1000 keys are returned from a pre-built cache. All subsequent keys are generated on the fly.

def encode( document: Mapping[str, Any], check_keys: bool = False, codec_options: xtquant.xtbson.bson37.CodecOptions = CodecOptions(document_class=dict, tz_aware=False, uuid_representation=UuidRepresentation.UNSPECIFIED, unicode_decode_error_handler='strict', tzinfo=None, type_registry=TypeRegistry(type_codecs=[], fallback_encoder=None), datetime_conversion=DatetimeConversion.DATETIME)) -> bytes:
 994def encode(
 995    document: _DocumentIn,
 996    check_keys: bool = False,
 997    codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS,
 998) -> bytes:
 999    """Encode a document to BSON.
1000
1001    A document can be any mapping type (like :class:`dict`).
1002
1003    Raises :class:`TypeError` if `document` is not a mapping type,
1004    or contains keys that are not instances of
1005    :class:`basestring` (:class:`str` in python 3). Raises
1006    :class:`~bson.errors.InvalidDocument` if `document` cannot be
1007    converted to :class:`BSON`.
1008
1009    :Parameters:
1010      - `document`: mapping type representing a document
1011      - `check_keys` (optional): check if keys start with '$' or
1012        contain '.', raising :class:`~bson.errors.InvalidDocument` in
1013        either case
1014      - `codec_options` (optional): An instance of
1015        :class:`~bson.codec_options.CodecOptions`.
1016
1017    .. versionadded:: 3.9
1018    """
1019    if not isinstance(codec_options, CodecOptions):
1020        raise _CODEC_OPTIONS_TYPE_ERROR
1021
1022    return _dict_to_bson(document, check_keys, codec_options)

Encode a document to BSON.

A document can be any mapping type (like dict).

Raises TypeError if document is not a mapping type, or contains keys that are not instances of basestring (str in python 3). Raises ~bson.errors.InvalidDocument if document cannot be converted to BSON.

:Parameters:

  • document: mapping type representing a document
  • check_keys (optional): check if keys start with '$' or contain '.', raising ~bson.errors.InvalidDocument in either case
  • codec_options (optional): An instance of ~bson.codec_options.CodecOptions.

New in version 3.9.

def decode( data: Union[bytes, memoryview, mmap.mmap, array.array], codec_options: Optional[xtquant.xtbson.bson37.CodecOptions[~_DocumentType]] = None) -> ~_DocumentType:
1025def decode(
1026    data: _ReadableBuffer, codec_options: "Optional[CodecOptions[_DocumentType]]" = None
1027) -> _DocumentType:
1028    """Decode BSON to a document.
1029
1030    By default, returns a BSON document represented as a Python
1031    :class:`dict`. To use a different :class:`MutableMapping` class,
1032    configure a :class:`~bson.codec_options.CodecOptions`::
1033
1034        >>> import collections  # From Python standard library.
1035        >>> import bson
1036        >>> from .codec_options import CodecOptions
1037        >>> data = bson.encode({'a': 1})
1038        >>> decoded_doc = bson.decode(data)
1039        <type 'dict'>
1040        >>> options = CodecOptions(document_class=collections.OrderedDict)
1041        >>> decoded_doc = bson.decode(data, codec_options=options)
1042        >>> type(decoded_doc)
1043        <class 'collections.OrderedDict'>
1044
1045    :Parameters:
1046      - `data`: the BSON to decode. Any bytes-like object that implements
1047        the buffer protocol.
1048      - `codec_options` (optional): An instance of
1049        :class:`~bson.codec_options.CodecOptions`.
1050
1051    .. versionadded:: 3.9
1052    """
1053    opts: CodecOptions = codec_options or DEFAULT_CODEC_OPTIONS
1054    if not isinstance(opts, CodecOptions):
1055        raise _CODEC_OPTIONS_TYPE_ERROR
1056
1057    return _bson_to_dict(data, opts)

Decode BSON to a document.

By default, returns a BSON document represented as a Python dict. To use a different MutableMapping class, configure a ~bson.codec_options.CodecOptions::

>>> import collections  # From Python standard library.
>>> import bson
>>> from .codec_options import CodecOptions
>>> data = bson.encode({'a': 1})
>>> decoded_doc = bson.decode(data)
<type 'dict'>
>>> options = CodecOptions(document_class=collections.OrderedDict)
>>> decoded_doc = bson.decode(data, codec_options=options)
>>> type(decoded_doc)
<class 'collections.OrderedDict'>

:Parameters:

  • data: the BSON to decode. Any bytes-like object that implements the buffer protocol.
  • codec_options (optional): An instance of ~bson.codec_options.CodecOptions.

New in version 3.9.

def decode_all( data: Union[bytes, memoryview, mmap.mmap, array.array], codec_options: Optional[xtquant.xtbson.bson37.CodecOptions[~_DocumentType]] = None) -> List[~_DocumentType]:
1094def decode_all(
1095    data: _ReadableBuffer, codec_options: "Optional[CodecOptions[_DocumentType]]" = None
1096) -> List[_DocumentType]:
1097    """Decode BSON data to multiple documents.
1098
1099    `data` must be a bytes-like object implementing the buffer protocol that
1100    provides concatenated, valid, BSON-encoded documents.
1101
1102    :Parameters:
1103      - `data`: BSON data
1104      - `codec_options` (optional): An instance of
1105        :class:`~bson.codec_options.CodecOptions`.
1106
1107    .. versionchanged:: 3.9
1108       Supports bytes-like objects that implement the buffer protocol.
1109
1110    .. versionchanged:: 3.0
1111       Removed `compile_re` option: PyMongo now always represents BSON regular
1112       expressions as :class:`~bson.regex.Regex` objects. Use
1113       :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
1114       BSON regular expression to a Python regular expression object.
1115
1116       Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
1117       `codec_options`.
1118    """
1119    opts = codec_options or DEFAULT_CODEC_OPTIONS
1120    if not isinstance(opts, CodecOptions):
1121        raise _CODEC_OPTIONS_TYPE_ERROR
1122
1123    return _decode_all(data, opts)  # type: ignore[arg-type]

Decode BSON data to multiple documents.

data must be a bytes-like object implementing the buffer protocol that provides concatenated, valid, BSON-encoded documents.

:Parameters:

  • data: BSON data
  • codec_options (optional): An instance of ~bson.codec_options.CodecOptions.

Changed in version 3.9: Supports bytes-like objects that implement the buffer protocol.

Changed in version 3.0: Removed compile_re option: PyMongo now always represents BSON regular expressions as ~bson.regex.Regex objects. Use ~bson.regex.Regex.try_compile() to attempt to convert from a BSON regular expression to a Python regular expression object.

Replaced as_class, tz_aware, and uuid_subtype options with codec_options.

def decode_iter( data: bytes, codec_options: Optional[xtquant.xtbson.bson37.CodecOptions[~_DocumentType]] = None) -> Iterator[~_DocumentType]:
1230def decode_iter(
1231    data: bytes, codec_options: "Optional[CodecOptions[_DocumentType]]" = None
1232) -> Iterator[_DocumentType]:
1233    """Decode BSON data to multiple documents as a generator.
1234
1235    Works similarly to the decode_all function, but yields one document at a
1236    time.
1237
1238    `data` must be a string of concatenated, valid, BSON-encoded
1239    documents.
1240
1241    :Parameters:
1242      - `data`: BSON data
1243      - `codec_options` (optional): An instance of
1244        :class:`~bson.codec_options.CodecOptions`.
1245
1246    .. versionchanged:: 3.0
1247       Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
1248       `codec_options`.
1249
1250    .. versionadded:: 2.8
1251    """
1252    opts = codec_options or DEFAULT_CODEC_OPTIONS
1253    if not isinstance(opts, CodecOptions):
1254        raise _CODEC_OPTIONS_TYPE_ERROR
1255
1256    position = 0
1257    end = len(data) - 1
1258    while position < end:
1259        obj_size = _UNPACK_INT_FROM(data, position)[0]
1260        elements = data[position : position + obj_size]
1261        position += obj_size
1262
1263        yield _bson_to_dict(elements, opts)

Decode BSON data to multiple documents as a generator.

Works similarly to the decode_all function, but yields one document at a time.

data must be a string of concatenated, valid, BSON-encoded documents.

:Parameters:

  • data: BSON data
  • codec_options (optional): An instance of ~bson.codec_options.CodecOptions.

Changed in version 3.0: Replaced as_class, tz_aware, and uuid_subtype options with codec_options.

New in version 2.8.

def decode_file_iter( file_obj: Union[BinaryIO, IO], codec_options: Optional[xtquant.xtbson.bson37.CodecOptions[~_DocumentType]] = None) -> Iterator[~_DocumentType]:
1266def decode_file_iter(
1267    file_obj: Union[BinaryIO, IO], codec_options: "Optional[CodecOptions[_DocumentType]]" = None
1268) -> Iterator[_DocumentType]:
1269    """Decode bson data from a file to multiple documents as a generator.
1270
1271    Works similarly to the decode_all function, but reads from the file object
1272    in chunks and parses bson in chunks, yielding one document at a time.
1273
1274    :Parameters:
1275      - `file_obj`: A file object containing BSON data.
1276      - `codec_options` (optional): An instance of
1277        :class:`~bson.codec_options.CodecOptions`.
1278
1279    .. versionchanged:: 3.0
1280       Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
1281       `codec_options`.
1282
1283    .. versionadded:: 2.8
1284    """
1285    opts = codec_options or DEFAULT_CODEC_OPTIONS
1286    while True:
1287        # Read size of next object.
1288        size_data = file_obj.read(4)
1289        if not size_data:
1290            break  # Finished with file normaly.
1291        elif len(size_data) != 4:
1292            raise InvalidBSON("cut off in middle of objsize")
1293        obj_size = _UNPACK_INT_FROM(size_data, 0)[0] - 4
1294        elements = size_data + file_obj.read(max(0, obj_size))
1295        yield _bson_to_dict(elements, opts)

Decode bson data from a file to multiple documents as a generator.

Works similarly to the decode_all function, but reads from the file object in chunks and parses bson in chunks, yielding one document at a time.

:Parameters:

  • file_obj: A file object containing BSON data.
  • codec_options (optional): An instance of ~bson.codec_options.CodecOptions.

Changed in version 3.0: Replaced as_class, tz_aware, and uuid_subtype options with codec_options.

New in version 2.8.

def is_valid(bson: bytes) -> bool:
1298def is_valid(bson: bytes) -> bool:
1299    """Check that the given string represents valid :class:`BSON` data.
1300
1301    Raises :class:`TypeError` if `bson` is not an instance of
1302    :class:`str` (:class:`bytes` in python 3). Returns ``True``
1303    if `bson` is valid :class:`BSON`, ``False`` otherwise.
1304
1305    :Parameters:
1306      - `bson`: the data to be validated
1307    """
1308    if not isinstance(bson, bytes):
1309        raise TypeError("BSON data must be an instance of a subclass of bytes")
1310
1311    try:
1312        _bson_to_dict(bson, DEFAULT_CODEC_OPTIONS)
1313        return True
1314    except Exception:
1315        return False

Check that the given string represents valid BSON data.

Raises TypeError if bson is not an instance of str (bytes in python 3). Returns True if bson is valid BSON, False otherwise.

:Parameters:

  • bson: the data to be validated
class BSON(builtins.bytes):
1318class BSON(bytes):
1319    """BSON (Binary JSON) data.
1320
1321    .. warning:: Using this class to encode and decode BSON adds a performance
1322       cost. For better performance use the module level functions
1323       :func:`encode` and :func:`decode` instead.
1324    """
1325
1326    @classmethod
1327    def encode(
1328        cls: Type["BSON"],
1329        document: _DocumentIn,
1330        check_keys: bool = False,
1331        codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS,
1332    ) -> "BSON":
1333        """Encode a document to a new :class:`BSON` instance.
1334
1335        A document can be any mapping type (like :class:`dict`).
1336
1337        Raises :class:`TypeError` if `document` is not a mapping type,
1338        or contains keys that are not instances of
1339        :class:`basestring` (:class:`str` in python 3). Raises
1340        :class:`~bson.errors.InvalidDocument` if `document` cannot be
1341        converted to :class:`BSON`.
1342
1343        :Parameters:
1344          - `document`: mapping type representing a document
1345          - `check_keys` (optional): check if keys start with '$' or
1346            contain '.', raising :class:`~bson.errors.InvalidDocument` in
1347            either case
1348          - `codec_options` (optional): An instance of
1349            :class:`~bson.codec_options.CodecOptions`.
1350
1351        .. versionchanged:: 3.0
1352           Replaced `uuid_subtype` option with `codec_options`.
1353        """
1354        return cls(encode(document, check_keys, codec_options))
1355
1356    def decode(self, codec_options: "CodecOptions[_DocumentType]" = DEFAULT_CODEC_OPTIONS) -> _DocumentType:  # type: ignore[override,assignment]
1357        """Decode this BSON data.
1358
1359        By default, returns a BSON document represented as a Python
1360        :class:`dict`. To use a different :class:`MutableMapping` class,
1361        configure a :class:`~bson.codec_options.CodecOptions`::
1362
1363            >>> import collections  # From Python standard library.
1364            >>> import bson
1365            >>> from .codec_options import CodecOptions
1366            >>> data = bson.BSON.encode({'a': 1})
1367            >>> decoded_doc = bson.BSON(data).decode()
1368            <type 'dict'>
1369            >>> options = CodecOptions(document_class=collections.OrderedDict)
1370            >>> decoded_doc = bson.BSON(data).decode(codec_options=options)
1371            >>> type(decoded_doc)
1372            <class 'collections.OrderedDict'>
1373
1374        :Parameters:
1375          - `codec_options` (optional): An instance of
1376            :class:`~bson.codec_options.CodecOptions`.
1377
1378        .. versionchanged:: 3.0
1379           Removed `compile_re` option: PyMongo now always represents BSON
1380           regular expressions as :class:`~bson.regex.Regex` objects. Use
1381           :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
1382           BSON regular expression to a Python regular expression object.
1383
1384           Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
1385           `codec_options`.
1386        """
1387        return decode(self, codec_options)

BSON (Binary JSON) data.

Using this class to encode and decode BSON adds a performance

cost. For better performance use the module level functions encode() and decode() instead.

@classmethod
def encode( cls: Type[xtquant.xtbson.bson37.BSON], document: Mapping[str, Any], check_keys: bool = False, codec_options: xtquant.xtbson.bson37.CodecOptions = CodecOptions(document_class=dict, tz_aware=False, uuid_representation=UuidRepresentation.UNSPECIFIED, unicode_decode_error_handler='strict', tzinfo=None, type_registry=TypeRegistry(type_codecs=[], fallback_encoder=None), datetime_conversion=DatetimeConversion.DATETIME)) -> xtquant.xtbson.bson37.BSON:
1326    @classmethod
1327    def encode(
1328        cls: Type["BSON"],
1329        document: _DocumentIn,
1330        check_keys: bool = False,
1331        codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS,
1332    ) -> "BSON":
1333        """Encode a document to a new :class:`BSON` instance.
1334
1335        A document can be any mapping type (like :class:`dict`).
1336
1337        Raises :class:`TypeError` if `document` is not a mapping type,
1338        or contains keys that are not instances of
1339        :class:`basestring` (:class:`str` in python 3). Raises
1340        :class:`~bson.errors.InvalidDocument` if `document` cannot be
1341        converted to :class:`BSON`.
1342
1343        :Parameters:
1344          - `document`: mapping type representing a document
1345          - `check_keys` (optional): check if keys start with '$' or
1346            contain '.', raising :class:`~bson.errors.InvalidDocument` in
1347            either case
1348          - `codec_options` (optional): An instance of
1349            :class:`~bson.codec_options.CodecOptions`.
1350
1351        .. versionchanged:: 3.0
1352           Replaced `uuid_subtype` option with `codec_options`.
1353        """
1354        return cls(encode(document, check_keys, codec_options))

Encode a document to a new BSON instance.

A document can be any mapping type (like dict).

Raises TypeError if document is not a mapping type, or contains keys that are not instances of basestring (str in python 3). Raises ~bson.errors.InvalidDocument if document cannot be converted to BSON.

:Parameters:

  • document: mapping type representing a document
  • check_keys (optional): check if keys start with '$' or contain '.', raising ~bson.errors.InvalidDocument in either case
  • codec_options (optional): An instance of ~bson.codec_options.CodecOptions.

Changed in version 3.0: Replaced uuid_subtype option with codec_options.

def decode( self, codec_options: xtquant.xtbson.bson37.CodecOptions[~_DocumentType] = CodecOptions(document_class=dict, tz_aware=False, uuid_representation=UuidRepresentation.UNSPECIFIED, unicode_decode_error_handler='strict', tzinfo=None, type_registry=TypeRegistry(type_codecs=[], fallback_encoder=None), datetime_conversion=DatetimeConversion.DATETIME)) -> ~_DocumentType:
1356    def decode(self, codec_options: "CodecOptions[_DocumentType]" = DEFAULT_CODEC_OPTIONS) -> _DocumentType:  # type: ignore[override,assignment]
1357        """Decode this BSON data.
1358
1359        By default, returns a BSON document represented as a Python
1360        :class:`dict`. To use a different :class:`MutableMapping` class,
1361        configure a :class:`~bson.codec_options.CodecOptions`::
1362
1363            >>> import collections  # From Python standard library.
1364            >>> import bson
1365            >>> from .codec_options import CodecOptions
1366            >>> data = bson.BSON.encode({'a': 1})
1367            >>> decoded_doc = bson.BSON(data).decode()
1368            <type 'dict'>
1369            >>> options = CodecOptions(document_class=collections.OrderedDict)
1370            >>> decoded_doc = bson.BSON(data).decode(codec_options=options)
1371            >>> type(decoded_doc)
1372            <class 'collections.OrderedDict'>
1373
1374        :Parameters:
1375          - `codec_options` (optional): An instance of
1376            :class:`~bson.codec_options.CodecOptions`.
1377
1378        .. versionchanged:: 3.0
1379           Removed `compile_re` option: PyMongo now always represents BSON
1380           regular expressions as :class:`~bson.regex.Regex` objects. Use
1381           :meth:`~bson.regex.Regex.try_compile` to attempt to convert from a
1382           BSON regular expression to a Python regular expression object.
1383
1384           Replaced `as_class`, `tz_aware`, and `uuid_subtype` options with
1385           `codec_options`.
1386        """
1387        return decode(self, codec_options)

Decode this BSON data.

By default, returns a BSON document represented as a Python dict. To use a different MutableMapping class, configure a ~bson.codec_options.CodecOptions::

>>> import collections  # From Python standard library.
>>> import bson
>>> from .codec_options import CodecOptions
>>> data = bson.BSON.encode({'a': 1})
>>> decoded_doc = bson.BSON(data).decode()
<type 'dict'>
>>> options = CodecOptions(document_class=collections.OrderedDict)
>>> decoded_doc = bson.BSON(data).decode(codec_options=options)
>>> type(decoded_doc)
<class 'collections.OrderedDict'>

:Parameters:

  • codec_options (optional): An instance of ~bson.codec_options.CodecOptions.

Changed in version 3.0: Removed compile_re option: PyMongo now always represents BSON regular expressions as ~bson.regex.Regex objects. Use ~bson.regex.Regex.try_compile() to attempt to convert from a BSON regular expression to a Python regular expression object.

Replaced as_class, tz_aware, and uuid_subtype options with codec_options.

Inherited Members
builtins.bytes
capitalize
center
count
endswith
expandtabs
find
fromhex
hex
index
isalnum
isalpha
isascii
isdigit
islower
isspace
istitle
isupper
join
ljust
lower
lstrip
maketrans
partition
replace
removeprefix
removesuffix
rfind
rindex
rjust
rpartition
rsplit
rstrip
split
splitlines
startswith
strip
swapcase
title
translate
upper
zfill
def has_c() -> bool:
1390def has_c() -> bool:
1391    """Is the C extension installed?"""
1392    return _USE_C

Is the C extension installed?

class DatetimeConversion(builtins.int, enum.Enum):
203class DatetimeConversion(int, enum.Enum):
204    """Options for decoding BSON datetimes."""
205
206    DATETIME = 1
207    """Decode a BSON UTC datetime as a :class:`datetime.datetime`.
208
209    BSON UTC datetimes that cannot be represented as a
210    :class:`~datetime.datetime` will raise an :class:`OverflowError`
211    or a :class:`ValueError`.
212
213    .. versionadded 4.3
214    """
215
216    DATETIME_CLAMP = 2
217    """Decode a BSON UTC datetime as a :class:`datetime.datetime`, clamping
218    to :attr:`~datetime.datetime.min` and :attr:`~datetime.datetime.max`.
219
220    .. versionadded 4.3
221    """
222
223    DATETIME_MS = 3
224    """Decode a BSON UTC datetime as a :class:`~bson.datetime_ms.DatetimeMS`
225    object.
226
227    .. versionadded 4.3
228    """
229
230    DATETIME_AUTO = 4
231    """Decode a BSON UTC datetime as a :class:`datetime.datetime` if possible,
232    and a :class:`~bson.datetime_ms.DatetimeMS` if not.
233
234    .. versionadded 4.3
235    """

Options for decoding BSON datetimes.

DATETIME = <DatetimeConversion.DATETIME: 1>

Decode a BSON UTC datetime as a datetime.datetime.

BSON UTC datetimes that cannot be represented as a ~datetime.datetime will raise an OverflowError or a ValueError.

.. versionadded 4.3

DATETIME_CLAMP = <DatetimeConversion.DATETIME_CLAMP: 2>

Decode a BSON UTC datetime as a datetime.datetime, clamping to ~datetime.datetime.min and ~datetime.datetime.max.

.. versionadded 4.3

DATETIME_MS = <DatetimeConversion.DATETIME_MS: 3>

Decode a BSON UTC datetime as a ~bson.datetime_ms.DatetimeMS object.

.. versionadded 4.3

DATETIME_AUTO = <DatetimeConversion.DATETIME_AUTO: 4>

Decode a BSON UTC datetime as a datetime.datetime if possible, and a ~bson.datetime_ms.DatetimeMS if not.

.. versionadded 4.3

Inherited Members
enum.Enum
name
value
builtins.int
conjugate
bit_length
bit_count
to_bytes
from_bytes
as_integer_ratio
real
imag
numerator
denominator
class DatetimeMS:
 33class DatetimeMS:
 34    """Represents a BSON UTC datetime."""
 35
 36    __slots__ = ("_value",)
 37
 38    def __init__(self, value: Union[int, datetime.datetime]):
 39        """Represents a BSON UTC datetime.
 40
 41        BSON UTC datetimes are defined as an int64 of milliseconds since the
 42        Unix epoch. The principal use of DatetimeMS is to represent
 43        datetimes outside the range of the Python builtin
 44        :class:`~datetime.datetime` class when
 45        encoding/decoding BSON.
 46
 47        To decode UTC datetimes as a ``DatetimeMS``, `datetime_conversion` in
 48        :class:`~bson.CodecOptions` must be set to 'datetime_ms' or
 49        'datetime_auto'. See :ref:`handling-out-of-range-datetimes` for
 50        details.
 51
 52        :Parameters:
 53          - `value`: An instance of :class:`datetime.datetime` to be
 54            represented as milliseconds since the Unix epoch, or int of
 55            milliseconds since the Unix epoch.
 56        """
 57        if isinstance(value, int):
 58            if not (-(2**63) <= value <= 2**63 - 1):
 59                raise OverflowError("Must be a 64-bit integer of milliseconds")
 60            self._value = value
 61        elif isinstance(value, datetime.datetime):
 62            self._value = _datetime_to_millis(value)
 63        else:
 64            raise TypeError(f"{type(value)} is not a valid type for DatetimeMS")
 65
 66    def __hash__(self) -> int:
 67        return hash(self._value)
 68
 69    def __repr__(self) -> str:
 70        return type(self).__name__ + "(" + str(self._value) + ")"
 71
 72    def __lt__(self, other: Union["DatetimeMS", int]) -> bool:
 73        return self._value < other
 74
 75    def __le__(self, other: Union["DatetimeMS", int]) -> bool:
 76        return self._value <= other
 77
 78    def __eq__(self, other: Any) -> bool:
 79        if isinstance(other, DatetimeMS):
 80            return self._value == other._value
 81        return False
 82
 83    def __ne__(self, other: Any) -> bool:
 84        if isinstance(other, DatetimeMS):
 85            return self._value != other._value
 86        return True
 87
 88    def __gt__(self, other: Union["DatetimeMS", int]) -> bool:
 89        return self._value > other
 90
 91    def __ge__(self, other: Union["DatetimeMS", int]) -> bool:
 92        return self._value >= other
 93
 94    _type_marker = 9
 95
 96    def as_datetime(self, codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS) -> datetime.datetime:
 97        """Create a Python :class:`~datetime.datetime` from this DatetimeMS object.
 98
 99        :Parameters:
100          - `codec_options`: A CodecOptions instance for specifying how the
101            resulting DatetimeMS object will be formatted using ``tz_aware``
102            and ``tz_info``. Defaults to
103            :const:`~bson.codec_options.DEFAULT_CODEC_OPTIONS`.
104        """
105        return cast(datetime.datetime, _millis_to_datetime(self._value, codec_options))
106
107    def __int__(self) -> int:
108        return self._value

Represents a BSON UTC datetime.

DatetimeMS(value: Union[int, datetime.datetime])
38    def __init__(self, value: Union[int, datetime.datetime]):
39        """Represents a BSON UTC datetime.
40
41        BSON UTC datetimes are defined as an int64 of milliseconds since the
42        Unix epoch. The principal use of DatetimeMS is to represent
43        datetimes outside the range of the Python builtin
44        :class:`~datetime.datetime` class when
45        encoding/decoding BSON.
46
47        To decode UTC datetimes as a ``DatetimeMS``, `datetime_conversion` in
48        :class:`~bson.CodecOptions` must be set to 'datetime_ms' or
49        'datetime_auto'. See :ref:`handling-out-of-range-datetimes` for
50        details.
51
52        :Parameters:
53          - `value`: An instance of :class:`datetime.datetime` to be
54            represented as milliseconds since the Unix epoch, or int of
55            milliseconds since the Unix epoch.
56        """
57        if isinstance(value, int):
58            if not (-(2**63) <= value <= 2**63 - 1):
59                raise OverflowError("Must be a 64-bit integer of milliseconds")
60            self._value = value
61        elif isinstance(value, datetime.datetime):
62            self._value = _datetime_to_millis(value)
63        else:
64            raise TypeError(f"{type(value)} is not a valid type for DatetimeMS")

Represents a BSON UTC datetime.

BSON UTC datetimes are defined as an int64 of milliseconds since the Unix epoch. The principal use of DatetimeMS is to represent datetimes outside the range of the Python builtin ~datetime.datetime class when encoding/decoding BSON.

To decode UTC datetimes as a DatetimeMS, datetime_conversion in ~bson.CodecOptions must be set to 'datetime_ms' or 'datetime_auto'. See :ref:handling-out-of-range-datetimes for details.

:Parameters:

  • value: An instance of datetime.datetime to be represented as milliseconds since the Unix epoch, or int of milliseconds since the Unix epoch.
def as_datetime( self, codec_options: xtquant.xtbson.bson37.CodecOptions = CodecOptions(document_class=dict, tz_aware=False, uuid_representation=UuidRepresentation.UNSPECIFIED, unicode_decode_error_handler='strict', tzinfo=None, type_registry=TypeRegistry(type_codecs=[], fallback_encoder=None), datetime_conversion=DatetimeConversion.DATETIME)) -> datetime.datetime:
 96    def as_datetime(self, codec_options: CodecOptions = DEFAULT_CODEC_OPTIONS) -> datetime.datetime:
 97        """Create a Python :class:`~datetime.datetime` from this DatetimeMS object.
 98
 99        :Parameters:
100          - `codec_options`: A CodecOptions instance for specifying how the
101            resulting DatetimeMS object will be formatted using ``tz_aware``
102            and ``tz_info``. Defaults to
103            :const:`~bson.codec_options.DEFAULT_CODEC_OPTIONS`.
104        """
105        return cast(datetime.datetime, _millis_to_datetime(self._value, codec_options))

Create a Python ~datetime.datetime from this DatetimeMS object.

:Parameters:

  • codec_options: A CodecOptions instance for specifying how the resulting DatetimeMS object will be formatted using tz_aware and tz_info. Defaults to ~bson.codec_options.DEFAULT_CODEC_OPTIONS.