From 1fbe83bedcad75924a202645014a97dcf109d4ea Mon Sep 17 00:00:00 2001 From: Edgars Irmejs Date: Thu, 17 Nov 2022 10:23:01 +0200 Subject: [PATCH 1/3] Fix place where 'mini' type was invalid --- docs/performance.rst | 2 +- simdjson/__init__.pyi | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/performance.rst b/docs/performance.rst index 2d12b9c..d22b06a 100644 --- a/docs/performance.rst +++ b/docs/performance.rst @@ -36,7 +36,7 @@ Both of these approaches will be much faster than using `load/s()`, since they avoid loading the parts of the document we didn't care about. Both `Object` and `Array` have a `mini` property that returns their entire -content as a minified Python `str`. A message router for example would only +content as a minified Python `bytes`. A message router for example would only parse the document and retrieve a single property, the destination, and forward the payload without ever turning it into a Python object. Here's a (bad) example: diff --git a/simdjson/__init__.pyi b/simdjson/__init__.pyi index d42b480..606e804 100644 --- a/simdjson/__init__.pyi +++ b/simdjson/__init__.pyi @@ -52,7 +52,7 @@ class Object(Mapping[str, SimValue]): ... @property - def mini(self) -> str: + def mini(self) -> bytes: ... @@ -73,7 +73,7 @@ class Array(Sequence[SimValue]): ... @property - def mini(self) -> str: + def mini(self) -> bytes: ... From 3e56243b9718043a72b92f100640b52b7bf3c348 Mon Sep 17 00:00:00 2001 From: Edgars Irmejs Date: Tue, 22 Nov 2022 00:29:34 +0200 Subject: [PATCH 2/3] Don't drop '.0' from whole number floats, when using .mini --- .gitignore | 3 + simdjson/csimdjson.cpp | 2761 +- simdjson/patches/README | 5 + simdjson/patches/apply.sh | 7 + simdjson/patches/float-aware-minify.patch | 42 + simdjson/simdjson.cpp | 12 +- simdjson/simdjson_source/README | 7 + simdjson/simdjson_source/simdjson.cpp | 15051 +++++++++ simdjson/simdjson_source/simdjson.h | 32135 ++++++++++++++++++++ tests/conftest.py | 2 + tests/test_float.py | 9 + 11 files changed, 48678 insertions(+), 1356 deletions(-) create mode 100644 simdjson/patches/README create mode 100755 simdjson/patches/apply.sh create mode 100644 simdjson/patches/float-aware-minify.patch create mode 100644 simdjson/simdjson_source/README create mode 100644 simdjson/simdjson_source/simdjson.cpp create mode 100644 simdjson/simdjson_source/simdjson.h create mode 100644 tests/test_float.py diff --git a/.gitignore b/.gitignore index f81086b..b0b248c 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,6 @@ _build/ # coverage.py default output file .coverage + +# IDE cruft +.idea/ diff --git a/simdjson/csimdjson.cpp b/simdjson/csimdjson.cpp index 9145f61..6336efc 100644 --- a/simdjson/csimdjson.cpp +++ b/simdjson/csimdjson.cpp @@ -1,4 +1,4 @@ -/* Generated by Cython 0.29.30 */ +/* Generated by Cython 0.29.32 */ /* BEGIN: Cython Metadata { @@ -32,8 +32,8 @@ END: Cython Metadata */ #elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000) #error Cython requires Python 2.6+ or Python 3.3+. #else -#define CYTHON_ABI "0_29_30" -#define CYTHON_HEX_VERSION 0x001D1EF0 +#define CYTHON_ABI "0_29_32" +#define CYTHON_HEX_VERSION 0x001D20F0 #define CYTHON_FUTURE_DIVISION 1 #include #ifndef offsetof @@ -72,6 +72,7 @@ END: Cython Metadata */ #define CYTHON_COMPILING_IN_PYPY 1 #define CYTHON_COMPILING_IN_PYSTON 0 #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_NOGIL 0 #undef CYTHON_USE_TYPE_SLOTS #define CYTHON_USE_TYPE_SLOTS 0 #undef CYTHON_USE_PYTYPE_LOOKUP @@ -115,6 +116,7 @@ END: Cython Metadata */ #define CYTHON_COMPILING_IN_PYPY 0 #define CYTHON_COMPILING_IN_PYSTON 1 #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_NOGIL 0 #ifndef CYTHON_USE_TYPE_SLOTS #define CYTHON_USE_TYPE_SLOTS 1 #endif @@ -155,10 +157,56 @@ END: Cython Metadata */ #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC #define CYTHON_UPDATE_DESCRIPTOR_DOC 0 #endif +#elif defined(PY_NOGIL) + #define CYTHON_COMPILING_IN_PYPY 0 + #define CYTHON_COMPILING_IN_PYSTON 0 + #define CYTHON_COMPILING_IN_CPYTHON 0 + #define CYTHON_COMPILING_IN_NOGIL 1 + #ifndef CYTHON_USE_TYPE_SLOTS + #define CYTHON_USE_TYPE_SLOTS 1 + #endif + #undef CYTHON_USE_PYTYPE_LOOKUP + #define CYTHON_USE_PYTYPE_LOOKUP 0 + #ifndef CYTHON_USE_ASYNC_SLOTS + #define CYTHON_USE_ASYNC_SLOTS 1 + #endif + #undef CYTHON_USE_PYLIST_INTERNALS + #define CYTHON_USE_PYLIST_INTERNALS 0 + #ifndef CYTHON_USE_UNICODE_INTERNALS + #define CYTHON_USE_UNICODE_INTERNALS 1 + #endif + #undef CYTHON_USE_UNICODE_WRITER + #define CYTHON_USE_UNICODE_WRITER 0 + #undef CYTHON_USE_PYLONG_INTERNALS + #define CYTHON_USE_PYLONG_INTERNALS 0 + #ifndef CYTHON_AVOID_BORROWED_REFS + #define CYTHON_AVOID_BORROWED_REFS 0 + #endif + #ifndef CYTHON_ASSUME_SAFE_MACROS + #define CYTHON_ASSUME_SAFE_MACROS 1 + #endif + #ifndef CYTHON_UNPACK_METHODS + #define CYTHON_UNPACK_METHODS 1 + #endif + #undef CYTHON_FAST_THREAD_STATE + #define CYTHON_FAST_THREAD_STATE 0 + #undef CYTHON_FAST_PYCALL + #define CYTHON_FAST_PYCALL 0 + #ifndef CYTHON_PEP489_MULTI_PHASE_INIT + #define CYTHON_PEP489_MULTI_PHASE_INIT 1 + #endif + #ifndef CYTHON_USE_TP_FINALIZE + #define CYTHON_USE_TP_FINALIZE 1 + #endif + #undef CYTHON_USE_DICT_VERSIONS + #define CYTHON_USE_DICT_VERSIONS 0 + #undef CYTHON_USE_EXC_INFO_STACK + #define CYTHON_USE_EXC_INFO_STACK 0 #else #define CYTHON_COMPILING_IN_PYPY 0 #define CYTHON_COMPILING_IN_PYSTON 0 #define CYTHON_COMPILING_IN_CPYTHON 1 + #define CYTHON_COMPILING_IN_NOGIL 0 #ifndef CYTHON_USE_TYPE_SLOTS #define CYTHON_USE_TYPE_SLOTS 1 #endif @@ -955,7 +1003,7 @@ static const char *__pyx_filename; static const char *__pyx_f[] = { "simdjson/csimdjson.pyx", "stringsource", - "venv/lib/python3.10/site-packages/Cython/Includes/cpython/type.pxd", + "backend/venv/lib/python3.9/site-packages/Cython/Includes/cpython/type.pxd", }; /* MemviewSliceStruct.proto */ struct __pyx_memoryview_obj; @@ -973,30 +1021,26 @@ typedef struct { #ifndef CYTHON_ATOMICS #define CYTHON_ATOMICS 1 #endif +#define __PYX_CYTHON_ATOMICS_ENABLED() CYTHON_ATOMICS #define __pyx_atomic_int_type int -#if CYTHON_ATOMICS && __GNUC__ >= 4 && (__GNUC_MINOR__ > 1 ||\ - (__GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL >= 2)) &&\ - !defined(__i386__) - #define __pyx_atomic_incr_aligned(value, lock) __sync_fetch_and_add(value, 1) - #define __pyx_atomic_decr_aligned(value, lock) __sync_fetch_and_sub(value, 1) +#if CYTHON_ATOMICS && (__GNUC__ >= 5 || (__GNUC__ == 4 &&\ + (__GNUC_MINOR__ > 1 ||\ + (__GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ >= 2)))) + #define __pyx_atomic_incr_aligned(value) __sync_fetch_and_add(value, 1) + #define __pyx_atomic_decr_aligned(value) __sync_fetch_and_sub(value, 1) #ifdef __PYX_DEBUG_ATOMICS #warning "Using GNU atomics" #endif -#elif CYTHON_ATOMICS && defined(_MSC_VER) && 0 - #include +#elif CYTHON_ATOMICS && defined(_MSC_VER) && CYTHON_COMPILING_IN_NOGIL + #include #undef __pyx_atomic_int_type - #define __pyx_atomic_int_type LONG - #define __pyx_atomic_incr_aligned(value, lock) InterlockedIncrement(value) - #define __pyx_atomic_decr_aligned(value, lock) InterlockedDecrement(value) + #define __pyx_atomic_int_type long + #pragma intrinsic (_InterlockedExchangeAdd) + #define __pyx_atomic_incr_aligned(value) _InterlockedExchangeAdd(value, 1) + #define __pyx_atomic_decr_aligned(value) _InterlockedExchangeAdd(value, -1) #ifdef __PYX_DEBUG_ATOMICS #pragma message ("Using MSVC atomics") #endif -#elif CYTHON_ATOMICS && (defined(__ICC) || defined(__INTEL_COMPILER)) && 0 - #define __pyx_atomic_incr_aligned(value, lock) _InterlockedIncrement(value) - #define __pyx_atomic_decr_aligned(value, lock) _InterlockedDecrement(value) - #ifdef __PYX_DEBUG_ATOMICS - #warning "Using Intel atomics" - #endif #else #undef CYTHON_ATOMICS #define CYTHON_ATOMICS 0 @@ -1007,9 +1051,9 @@ typedef struct { typedef volatile __pyx_atomic_int_type __pyx_atomic_int; #if CYTHON_ATOMICS #define __pyx_add_acquisition_count(memview)\ - __pyx_atomic_incr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock) + __pyx_atomic_incr_aligned(__pyx_get_slice_count_pointer(memview)) #define __pyx_sub_acquisition_count(memview)\ - __pyx_atomic_decr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock) + __pyx_atomic_decr_aligned(__pyx_get_slice_count_pointer(memview)) #else #define __pyx_add_acquisition_count(memview)\ __pyx_add_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock) @@ -1231,7 +1275,7 @@ struct __pyx_obj_9csimdjson___pyx_scope_struct_4_get_implementations { }; -/* "View.MemoryView":105 +/* "View.MemoryView":106 * * @cname("__pyx_array") * cdef class array: # <<<<<<<<<<<<<< @@ -1256,7 +1300,7 @@ struct __pyx_array_obj { }; -/* "View.MemoryView":279 +/* "View.MemoryView":280 * * @cname('__pyx_MemviewEnum') * cdef class Enum(object): # <<<<<<<<<<<<<< @@ -1269,7 +1313,7 @@ struct __pyx_MemviewEnum_obj { }; -/* "View.MemoryView":330 +/* "View.MemoryView":331 * * @cname('__pyx_memoryview') * cdef class memoryview(object): # <<<<<<<<<<<<<< @@ -1292,7 +1336,7 @@ struct __pyx_memoryview_obj { }; -/* "View.MemoryView":965 +/* "View.MemoryView":967 * * @cname('__pyx_memoryviewslice') * cdef class _memoryviewslice(memoryview): # <<<<<<<<<<<<<< @@ -1354,7 +1398,7 @@ static struct __pyx_vtabstruct_9csimdjson_Object *__pyx_vtabptr_9csimdjson_Objec static CYTHON_INLINE PyObject *__pyx_f_9csimdjson_6Object_from_element(struct __pyx_obj_9csimdjson_Parser *, simdjson::dom::element); -/* "View.MemoryView":105 +/* "View.MemoryView":106 * * @cname("__pyx_array") * cdef class array: # <<<<<<<<<<<<<< @@ -1368,7 +1412,7 @@ struct __pyx_vtabstruct_array { static struct __pyx_vtabstruct_array *__pyx_vtabptr_array; -/* "View.MemoryView":330 +/* "View.MemoryView":331 * * @cname('__pyx_memoryview') * cdef class memoryview(object): # <<<<<<<<<<<<<< @@ -1388,7 +1432,7 @@ struct __pyx_vtabstruct_memoryview { static struct __pyx_vtabstruct_memoryview *__pyx_vtabptr_memoryview; -/* "View.MemoryView":965 +/* "View.MemoryView":967 * * @cname('__pyx_memoryviewslice') * cdef class _memoryviewslice(memoryview): # <<<<<<<<<<<<<< @@ -8676,7 +8720,7 @@ static CYTHON_INLINE PyObject *__pyx_convert_PyByteArray_string_to_py_std__in_st return __pyx_r; } -/* "View.MemoryView":122 +/* "View.MemoryView":123 * cdef bint dtype_is_object * * def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None, # <<<<<<<<<<<<<< @@ -8728,13 +8772,13 @@ static int __pyx_array___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, P case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_itemsize)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, 1); __PYX_ERR(1, 122, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, 1); __PYX_ERR(1, 123, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_format)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, 2); __PYX_ERR(1, 122, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, 2); __PYX_ERR(1, 123, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 3: @@ -8750,7 +8794,7 @@ static int __pyx_array___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, P } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) __PYX_ERR(1, 122, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) __PYX_ERR(1, 123, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -8766,14 +8810,14 @@ static int __pyx_array___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, P } } __pyx_v_shape = ((PyObject*)values[0]); - __pyx_v_itemsize = __Pyx_PyIndex_AsSsize_t(values[1]); if (unlikely((__pyx_v_itemsize == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 122, __pyx_L3_error) + __pyx_v_itemsize = __Pyx_PyIndex_AsSsize_t(values[1]); if (unlikely((__pyx_v_itemsize == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 123, __pyx_L3_error) __pyx_v_format = values[2]; __pyx_v_mode = values[3]; if (values[4]) { - __pyx_v_allocate_buffer = __Pyx_PyObject_IsTrue(values[4]); if (unlikely((__pyx_v_allocate_buffer == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 123, __pyx_L3_error) + __pyx_v_allocate_buffer = __Pyx_PyObject_IsTrue(values[4]); if (unlikely((__pyx_v_allocate_buffer == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 124, __pyx_L3_error) } else { - /* "View.MemoryView":123 + /* "View.MemoryView":124 * * def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None, * mode="c", bint allocate_buffer=True): # <<<<<<<<<<<<<< @@ -8785,19 +8829,19 @@ static int __pyx_array___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, P } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(1, 122, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(1, 123, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("View.MemoryView.array.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return -1; __pyx_L4_argument_unpacking_done:; - if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_shape), (&PyTuple_Type), 1, "shape", 1))) __PYX_ERR(1, 122, __pyx_L1_error) + if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_shape), (&PyTuple_Type), 1, "shape", 1))) __PYX_ERR(1, 123, __pyx_L1_error) if (unlikely(((PyObject *)__pyx_v_format) == Py_None)) { - PyErr_Format(PyExc_TypeError, "Argument '%.200s' must not be None", "format"); __PYX_ERR(1, 122, __pyx_L1_error) + PyErr_Format(PyExc_TypeError, "Argument '%.200s' must not be None", "format"); __PYX_ERR(1, 123, __pyx_L1_error) } __pyx_r = __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(((struct __pyx_array_obj *)__pyx_v_self), __pyx_v_shape, __pyx_v_itemsize, __pyx_v_format, __pyx_v_mode, __pyx_v_allocate_buffer); - /* "View.MemoryView":122 + /* "View.MemoryView":123 * cdef bint dtype_is_object * * def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None, # <<<<<<<<<<<<<< @@ -8839,7 +8883,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __Pyx_RefNannySetupContext("__cinit__", 0); __Pyx_INCREF(__pyx_v_format); - /* "View.MemoryView":129 + /* "View.MemoryView":130 * cdef PyObject **p * * self.ndim = len(shape) # <<<<<<<<<<<<<< @@ -8848,12 +8892,12 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ if (unlikely(__pyx_v_shape == Py_None)) { PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()"); - __PYX_ERR(1, 129, __pyx_L1_error) + __PYX_ERR(1, 130, __pyx_L1_error) } - __pyx_t_1 = PyTuple_GET_SIZE(__pyx_v_shape); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 129, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_SIZE(__pyx_v_shape); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 130, __pyx_L1_error) __pyx_v_self->ndim = ((int)__pyx_t_1); - /* "View.MemoryView":130 + /* "View.MemoryView":131 * * self.ndim = len(shape) * self.itemsize = itemsize # <<<<<<<<<<<<<< @@ -8862,7 +8906,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ __pyx_v_self->itemsize = __pyx_v_itemsize; - /* "View.MemoryView":132 + /* "View.MemoryView":133 * self.itemsize = itemsize * * if not self.ndim: # <<<<<<<<<<<<<< @@ -8872,20 +8916,20 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __pyx_t_2 = ((!(__pyx_v_self->ndim != 0)) != 0); if (unlikely(__pyx_t_2)) { - /* "View.MemoryView":133 + /* "View.MemoryView":134 * * if not self.ndim: * raise ValueError("Empty shape tuple for cython.array") # <<<<<<<<<<<<<< * * if itemsize <= 0: */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__16, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 133, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__16, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 134, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __PYX_ERR(1, 133, __pyx_L1_error) + __PYX_ERR(1, 134, __pyx_L1_error) - /* "View.MemoryView":132 + /* "View.MemoryView":133 * self.itemsize = itemsize * * if not self.ndim: # <<<<<<<<<<<<<< @@ -8894,7 +8938,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ } - /* "View.MemoryView":135 + /* "View.MemoryView":136 * raise ValueError("Empty shape tuple for cython.array") * * if itemsize <= 0: # <<<<<<<<<<<<<< @@ -8904,20 +8948,20 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __pyx_t_2 = ((__pyx_v_itemsize <= 0) != 0); if (unlikely(__pyx_t_2)) { - /* "View.MemoryView":136 + /* "View.MemoryView":137 * * if itemsize <= 0: * raise ValueError("itemsize <= 0 for cython.array") # <<<<<<<<<<<<<< * * if not isinstance(format, bytes): */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__17, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 136, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__17, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 137, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __PYX_ERR(1, 136, __pyx_L1_error) + __PYX_ERR(1, 137, __pyx_L1_error) - /* "View.MemoryView":135 + /* "View.MemoryView":136 * raise ValueError("Empty shape tuple for cython.array") * * if itemsize <= 0: # <<<<<<<<<<<<<< @@ -8926,7 +8970,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ } - /* "View.MemoryView":138 + /* "View.MemoryView":139 * raise ValueError("itemsize <= 0 for cython.array") * * if not isinstance(format, bytes): # <<<<<<<<<<<<<< @@ -8937,14 +8981,14 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __pyx_t_4 = ((!(__pyx_t_2 != 0)) != 0); if (__pyx_t_4) { - /* "View.MemoryView":139 + /* "View.MemoryView":140 * * if not isinstance(format, bytes): * format = format.encode('ASCII') # <<<<<<<<<<<<<< * self._format = format # keep a reference to the byte string * self.format = self._format */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_format, __pyx_n_s_encode); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 139, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_format, __pyx_n_s_encode); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 140, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_6 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_5))) { @@ -8958,13 +9002,13 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ } __pyx_t_3 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_6, __pyx_n_s_ASCII) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_n_s_ASCII); __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 139, __pyx_L1_error) + if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 140, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF_SET(__pyx_v_format, __pyx_t_3); __pyx_t_3 = 0; - /* "View.MemoryView":138 + /* "View.MemoryView":139 * raise ValueError("itemsize <= 0 for cython.array") * * if not isinstance(format, bytes): # <<<<<<<<<<<<<< @@ -8973,14 +9017,14 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ } - /* "View.MemoryView":140 + /* "View.MemoryView":141 * if not isinstance(format, bytes): * format = format.encode('ASCII') * self._format = format # keep a reference to the byte string # <<<<<<<<<<<<<< * self.format = self._format * */ - if (!(likely(PyBytes_CheckExact(__pyx_v_format))||((__pyx_v_format) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_format)->tp_name), 0))) __PYX_ERR(1, 140, __pyx_L1_error) + if (!(likely(PyBytes_CheckExact(__pyx_v_format))||((__pyx_v_format) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_format)->tp_name), 0))) __PYX_ERR(1, 141, __pyx_L1_error) __pyx_t_3 = __pyx_v_format; __Pyx_INCREF(__pyx_t_3); __Pyx_GIVEREF(__pyx_t_3); @@ -8989,7 +9033,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __pyx_v_self->_format = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; - /* "View.MemoryView":141 + /* "View.MemoryView":142 * format = format.encode('ASCII') * self._format = format # keep a reference to the byte string * self.format = self._format # <<<<<<<<<<<<<< @@ -8998,12 +9042,12 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ if (unlikely(__pyx_v_self->_format == Py_None)) { PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found"); - __PYX_ERR(1, 141, __pyx_L1_error) + __PYX_ERR(1, 142, __pyx_L1_error) } - __pyx_t_7 = __Pyx_PyBytes_AsWritableString(__pyx_v_self->_format); if (unlikely((!__pyx_t_7) && PyErr_Occurred())) __PYX_ERR(1, 141, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyBytes_AsWritableString(__pyx_v_self->_format); if (unlikely((!__pyx_t_7) && PyErr_Occurred())) __PYX_ERR(1, 142, __pyx_L1_error) __pyx_v_self->format = __pyx_t_7; - /* "View.MemoryView":144 + /* "View.MemoryView":145 * * * self._shape = PyObject_Malloc(sizeof(Py_ssize_t)*self.ndim*2) # <<<<<<<<<<<<<< @@ -9012,7 +9056,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ __pyx_v_self->_shape = ((Py_ssize_t *)PyObject_Malloc((((sizeof(Py_ssize_t)) * __pyx_v_self->ndim) * 2))); - /* "View.MemoryView":145 + /* "View.MemoryView":146 * * self._shape = PyObject_Malloc(sizeof(Py_ssize_t)*self.ndim*2) * self._strides = self._shape + self.ndim # <<<<<<<<<<<<<< @@ -9021,7 +9065,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ __pyx_v_self->_strides = (__pyx_v_self->_shape + __pyx_v_self->ndim); - /* "View.MemoryView":147 + /* "View.MemoryView":148 * self._strides = self._shape + self.ndim * * if not self._shape: # <<<<<<<<<<<<<< @@ -9031,20 +9075,20 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __pyx_t_4 = ((!(__pyx_v_self->_shape != 0)) != 0); if (unlikely(__pyx_t_4)) { - /* "View.MemoryView":148 + /* "View.MemoryView":149 * * if not self._shape: * raise MemoryError("unable to allocate shape and strides.") # <<<<<<<<<<<<<< * * */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_MemoryError, __pyx_tuple__18, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 148, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_MemoryError, __pyx_tuple__18, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 149, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __PYX_ERR(1, 148, __pyx_L1_error) + __PYX_ERR(1, 149, __pyx_L1_error) - /* "View.MemoryView":147 + /* "View.MemoryView":148 * self._strides = self._shape + self.ndim * * if not self._shape: # <<<<<<<<<<<<<< @@ -9053,7 +9097,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ } - /* "View.MemoryView":151 + /* "View.MemoryView":152 * * * for idx, dim in enumerate(shape): # <<<<<<<<<<<<<< @@ -9065,18 +9109,18 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ for (;;) { if (__pyx_t_1 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_1); __Pyx_INCREF(__pyx_t_5); __pyx_t_1++; if (unlikely(0 < 0)) __PYX_ERR(1, 151, __pyx_L1_error) + __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_1); __Pyx_INCREF(__pyx_t_5); __pyx_t_1++; if (unlikely(0 < 0)) __PYX_ERR(1, 152, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_3, __pyx_t_1); __pyx_t_1++; if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 151, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_3, __pyx_t_1); __pyx_t_1++; if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 152, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif - __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_t_5); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 151, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_t_5); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 152, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_v_dim = __pyx_t_9; __pyx_v_idx = __pyx_t_8; __pyx_t_8 = (__pyx_t_8 + 1); - /* "View.MemoryView":152 + /* "View.MemoryView":153 * * for idx, dim in enumerate(shape): * if dim <= 0: # <<<<<<<<<<<<<< @@ -9086,18 +9130,18 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __pyx_t_4 = ((__pyx_v_dim <= 0) != 0); if (unlikely(__pyx_t_4)) { - /* "View.MemoryView":153 + /* "View.MemoryView":154 * for idx, dim in enumerate(shape): * if dim <= 0: * raise ValueError("Invalid shape in axis %d: %d." % (idx, dim)) # <<<<<<<<<<<<<< * self._shape[idx] = dim * */ - __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_idx); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 153, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_idx); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 154, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = PyInt_FromSsize_t(__pyx_v_dim); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 153, __pyx_L1_error) + __pyx_t_6 = PyInt_FromSsize_t(__pyx_v_dim); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 154, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); - __pyx_t_10 = PyTuple_New(2); if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 153, __pyx_L1_error) + __pyx_t_10 = PyTuple_New(2); if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 154, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_t_5); @@ -9105,17 +9149,17 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ PyTuple_SET_ITEM(__pyx_t_10, 1, __pyx_t_6); __pyx_t_5 = 0; __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyString_Format(__pyx_kp_s_Invalid_shape_in_axis_d_d, __pyx_t_10); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 153, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyString_Format(__pyx_kp_s_Invalid_shape_in_axis_d_d, __pyx_t_10); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 154, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_CallOneArg(__pyx_builtin_ValueError, __pyx_t_6); if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 153, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_CallOneArg(__pyx_builtin_ValueError, __pyx_t_6); if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 154, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_Raise(__pyx_t_10, 0, 0, 0); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __PYX_ERR(1, 153, __pyx_L1_error) + __PYX_ERR(1, 154, __pyx_L1_error) - /* "View.MemoryView":152 + /* "View.MemoryView":153 * * for idx, dim in enumerate(shape): * if dim <= 0: # <<<<<<<<<<<<<< @@ -9124,7 +9168,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ } - /* "View.MemoryView":154 + /* "View.MemoryView":155 * if dim <= 0: * raise ValueError("Invalid shape in axis %d: %d." % (idx, dim)) * self._shape[idx] = dim # <<<<<<<<<<<<<< @@ -9133,7 +9177,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ (__pyx_v_self->_shape[__pyx_v_idx]) = __pyx_v_dim; - /* "View.MemoryView":151 + /* "View.MemoryView":152 * * * for idx, dim in enumerate(shape): # <<<<<<<<<<<<<< @@ -9143,17 +9187,17 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "View.MemoryView":157 + /* "View.MemoryView":158 * * cdef char order * if mode == 'fortran': # <<<<<<<<<<<<<< * order = b'F' * self.mode = u'fortran' */ - __pyx_t_4 = (__Pyx_PyString_Equals(__pyx_v_mode, __pyx_n_s_fortran, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(1, 157, __pyx_L1_error) + __pyx_t_4 = (__Pyx_PyString_Equals(__pyx_v_mode, __pyx_n_s_fortran, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(1, 158, __pyx_L1_error) if (__pyx_t_4) { - /* "View.MemoryView":158 + /* "View.MemoryView":159 * cdef char order * if mode == 'fortran': * order = b'F' # <<<<<<<<<<<<<< @@ -9162,7 +9206,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ __pyx_v_order = 'F'; - /* "View.MemoryView":159 + /* "View.MemoryView":160 * if mode == 'fortran': * order = b'F' * self.mode = u'fortran' # <<<<<<<<<<<<<< @@ -9175,7 +9219,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __Pyx_DECREF(__pyx_v_self->mode); __pyx_v_self->mode = __pyx_n_u_fortran; - /* "View.MemoryView":157 + /* "View.MemoryView":158 * * cdef char order * if mode == 'fortran': # <<<<<<<<<<<<<< @@ -9185,17 +9229,17 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ goto __pyx_L10; } - /* "View.MemoryView":160 + /* "View.MemoryView":161 * order = b'F' * self.mode = u'fortran' * elif mode == 'c': # <<<<<<<<<<<<<< * order = b'C' * self.mode = u'c' */ - __pyx_t_4 = (__Pyx_PyString_Equals(__pyx_v_mode, __pyx_n_s_c, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(1, 160, __pyx_L1_error) + __pyx_t_4 = (__Pyx_PyString_Equals(__pyx_v_mode, __pyx_n_s_c, Py_EQ)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(1, 161, __pyx_L1_error) if (likely(__pyx_t_4)) { - /* "View.MemoryView":161 + /* "View.MemoryView":162 * self.mode = u'fortran' * elif mode == 'c': * order = b'C' # <<<<<<<<<<<<<< @@ -9204,7 +9248,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ __pyx_v_order = 'C'; - /* "View.MemoryView":162 + /* "View.MemoryView":163 * elif mode == 'c': * order = b'C' * self.mode = u'c' # <<<<<<<<<<<<<< @@ -9217,7 +9261,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __Pyx_DECREF(__pyx_v_self->mode); __pyx_v_self->mode = __pyx_n_u_c; - /* "View.MemoryView":160 + /* "View.MemoryView":161 * order = b'F' * self.mode = u'fortran' * elif mode == 'c': # <<<<<<<<<<<<<< @@ -9227,7 +9271,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ goto __pyx_L10; } - /* "View.MemoryView":164 + /* "View.MemoryView":165 * self.mode = u'c' * else: * raise ValueError("Invalid mode, expected 'c' or 'fortran', got %s" % mode) # <<<<<<<<<<<<<< @@ -9235,18 +9279,18 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ * self.len = fill_contig_strides_array(self._shape, self._strides, */ /*else*/ { - __pyx_t_3 = __Pyx_PyString_FormatSafe(__pyx_kp_s_Invalid_mode_expected_c_or_fortr, __pyx_v_mode); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 164, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyString_FormatSafe(__pyx_kp_s_Invalid_mode_expected_c_or_fortr, __pyx_v_mode); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 165, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_10 = __Pyx_PyObject_CallOneArg(__pyx_builtin_ValueError, __pyx_t_3); if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 164, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_CallOneArg(__pyx_builtin_ValueError, __pyx_t_3); if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 165, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_Raise(__pyx_t_10, 0, 0, 0); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __PYX_ERR(1, 164, __pyx_L1_error) + __PYX_ERR(1, 165, __pyx_L1_error) } __pyx_L10:; - /* "View.MemoryView":166 + /* "View.MemoryView":167 * raise ValueError("Invalid mode, expected 'c' or 'fortran', got %s" % mode) * * self.len = fill_contig_strides_array(self._shape, self._strides, # <<<<<<<<<<<<<< @@ -9255,7 +9299,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ __pyx_v_self->len = __pyx_fill_contig_strides_array(__pyx_v_self->_shape, __pyx_v_self->_strides, __pyx_v_itemsize, __pyx_v_self->ndim, __pyx_v_order); - /* "View.MemoryView":169 + /* "View.MemoryView":170 * itemsize, self.ndim, order) * * self.free_data = allocate_buffer # <<<<<<<<<<<<<< @@ -9264,19 +9308,19 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ __pyx_v_self->free_data = __pyx_v_allocate_buffer; - /* "View.MemoryView":170 + /* "View.MemoryView":171 * * self.free_data = allocate_buffer * self.dtype_is_object = format == b'O' # <<<<<<<<<<<<<< * if allocate_buffer: * */ - __pyx_t_10 = PyObject_RichCompare(__pyx_v_format, __pyx_n_b_O, Py_EQ); __Pyx_XGOTREF(__pyx_t_10); if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 170, __pyx_L1_error) - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_10); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 170, __pyx_L1_error) + __pyx_t_10 = PyObject_RichCompare(__pyx_v_format, __pyx_n_b_O, Py_EQ); __Pyx_XGOTREF(__pyx_t_10); if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 171, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_10); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 171, __pyx_L1_error) __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_v_self->dtype_is_object = __pyx_t_4; - /* "View.MemoryView":171 + /* "View.MemoryView":172 * self.free_data = allocate_buffer * self.dtype_is_object = format == b'O' * if allocate_buffer: # <<<<<<<<<<<<<< @@ -9286,7 +9330,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __pyx_t_4 = (__pyx_v_allocate_buffer != 0); if (__pyx_t_4) { - /* "View.MemoryView":174 + /* "View.MemoryView":175 * * * self.data = malloc(self.len) # <<<<<<<<<<<<<< @@ -9295,7 +9339,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ __pyx_v_self->data = ((char *)malloc(__pyx_v_self->len)); - /* "View.MemoryView":175 + /* "View.MemoryView":176 * * self.data = malloc(self.len) * if not self.data: # <<<<<<<<<<<<<< @@ -9305,20 +9349,20 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __pyx_t_4 = ((!(__pyx_v_self->data != 0)) != 0); if (unlikely(__pyx_t_4)) { - /* "View.MemoryView":176 + /* "View.MemoryView":177 * self.data = malloc(self.len) * if not self.data: * raise MemoryError("unable to allocate array data.") # <<<<<<<<<<<<<< * * if self.dtype_is_object: */ - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_builtin_MemoryError, __pyx_tuple__19, NULL); if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 176, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_builtin_MemoryError, __pyx_tuple__19, NULL); if (unlikely(!__pyx_t_10)) __PYX_ERR(1, 177, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_Raise(__pyx_t_10, 0, 0, 0); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __PYX_ERR(1, 176, __pyx_L1_error) + __PYX_ERR(1, 177, __pyx_L1_error) - /* "View.MemoryView":175 + /* "View.MemoryView":176 * * self.data = malloc(self.len) * if not self.data: # <<<<<<<<<<<<<< @@ -9327,7 +9371,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ } - /* "View.MemoryView":178 + /* "View.MemoryView":179 * raise MemoryError("unable to allocate array data.") * * if self.dtype_is_object: # <<<<<<<<<<<<<< @@ -9337,7 +9381,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ __pyx_t_4 = (__pyx_v_self->dtype_is_object != 0); if (__pyx_t_4) { - /* "View.MemoryView":179 + /* "View.MemoryView":180 * * if self.dtype_is_object: * p = self.data # <<<<<<<<<<<<<< @@ -9346,7 +9390,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ __pyx_v_p = ((PyObject **)__pyx_v_self->data); - /* "View.MemoryView":180 + /* "View.MemoryView":181 * if self.dtype_is_object: * p = self.data * for i in range(self.len / itemsize): # <<<<<<<<<<<<<< @@ -9355,18 +9399,18 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ if (unlikely(__pyx_v_itemsize == 0)) { PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); - __PYX_ERR(1, 180, __pyx_L1_error) + __PYX_ERR(1, 181, __pyx_L1_error) } else if (sizeof(Py_ssize_t) == sizeof(long) && (!(((Py_ssize_t)-1) > 0)) && unlikely(__pyx_v_itemsize == (Py_ssize_t)-1) && unlikely(UNARY_NEG_WOULD_OVERFLOW(__pyx_v_self->len))) { PyErr_SetString(PyExc_OverflowError, "value too large to perform division"); - __PYX_ERR(1, 180, __pyx_L1_error) + __PYX_ERR(1, 181, __pyx_L1_error) } __pyx_t_1 = __Pyx_div_Py_ssize_t(__pyx_v_self->len, __pyx_v_itemsize); __pyx_t_9 = __pyx_t_1; for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_9; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "View.MemoryView":181 + /* "View.MemoryView":182 * p = self.data * for i in range(self.len / itemsize): * p[i] = Py_None # <<<<<<<<<<<<<< @@ -9375,7 +9419,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ (__pyx_v_p[__pyx_v_i]) = Py_None; - /* "View.MemoryView":182 + /* "View.MemoryView":183 * for i in range(self.len / itemsize): * p[i] = Py_None * Py_INCREF(Py_None) # <<<<<<<<<<<<<< @@ -9385,7 +9429,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ Py_INCREF(Py_None); } - /* "View.MemoryView":178 + /* "View.MemoryView":179 * raise MemoryError("unable to allocate array data.") * * if self.dtype_is_object: # <<<<<<<<<<<<<< @@ -9394,7 +9438,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ } - /* "View.MemoryView":171 + /* "View.MemoryView":172 * self.free_data = allocate_buffer * self.dtype_is_object = format == b'O' * if allocate_buffer: # <<<<<<<<<<<<<< @@ -9403,7 +9447,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ */ } - /* "View.MemoryView":122 + /* "View.MemoryView":123 * cdef bint dtype_is_object * * def __cinit__(array self, tuple shape, Py_ssize_t itemsize, format not None, # <<<<<<<<<<<<<< @@ -9427,7 +9471,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __ return __pyx_r; } -/* "View.MemoryView":185 +/* "View.MemoryView":186 * * @cname('getbuffer') * def __getbuffer__(self, Py_buffer *info, int flags): # <<<<<<<<<<<<<< @@ -9470,7 +9514,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None); __Pyx_GIVEREF(__pyx_v_info->obj); - /* "View.MemoryView":186 + /* "View.MemoryView":187 * @cname('getbuffer') * def __getbuffer__(self, Py_buffer *info, int flags): * cdef int bufmode = -1 # <<<<<<<<<<<<<< @@ -9479,18 +9523,18 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru */ __pyx_v_bufmode = -1; - /* "View.MemoryView":187 + /* "View.MemoryView":188 * def __getbuffer__(self, Py_buffer *info, int flags): * cdef int bufmode = -1 * if self.mode == u"c": # <<<<<<<<<<<<<< * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS * elif self.mode == u"fortran": */ - __pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_self->mode, __pyx_n_u_c, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 187, __pyx_L1_error) + __pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_self->mode, __pyx_n_u_c, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 188, __pyx_L1_error) __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "View.MemoryView":188 + /* "View.MemoryView":189 * cdef int bufmode = -1 * if self.mode == u"c": * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS # <<<<<<<<<<<<<< @@ -9499,7 +9543,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru */ __pyx_v_bufmode = (PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS); - /* "View.MemoryView":187 + /* "View.MemoryView":188 * def __getbuffer__(self, Py_buffer *info, int flags): * cdef int bufmode = -1 * if self.mode == u"c": # <<<<<<<<<<<<<< @@ -9509,18 +9553,18 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru goto __pyx_L3; } - /* "View.MemoryView":189 + /* "View.MemoryView":190 * if self.mode == u"c": * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS * elif self.mode == u"fortran": # <<<<<<<<<<<<<< * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS * if not (flags & bufmode): */ - __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_v_self->mode, __pyx_n_u_fortran, Py_EQ)); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(1, 189, __pyx_L1_error) + __pyx_t_2 = (__Pyx_PyUnicode_Equals(__pyx_v_self->mode, __pyx_n_u_fortran, Py_EQ)); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(1, 190, __pyx_L1_error) __pyx_t_1 = (__pyx_t_2 != 0); if (__pyx_t_1) { - /* "View.MemoryView":190 + /* "View.MemoryView":191 * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS * elif self.mode == u"fortran": * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS # <<<<<<<<<<<<<< @@ -9529,7 +9573,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru */ __pyx_v_bufmode = (PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS); - /* "View.MemoryView":189 + /* "View.MemoryView":190 * if self.mode == u"c": * bufmode = PyBUF_C_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS * elif self.mode == u"fortran": # <<<<<<<<<<<<<< @@ -9539,7 +9583,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru } __pyx_L3:; - /* "View.MemoryView":191 + /* "View.MemoryView":192 * elif self.mode == u"fortran": * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS * if not (flags & bufmode): # <<<<<<<<<<<<<< @@ -9549,20 +9593,20 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru __pyx_t_1 = ((!((__pyx_v_flags & __pyx_v_bufmode) != 0)) != 0); if (unlikely(__pyx_t_1)) { - /* "View.MemoryView":192 + /* "View.MemoryView":193 * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS * if not (flags & bufmode): * raise ValueError("Can only create a buffer that is contiguous in memory.") # <<<<<<<<<<<<<< * info.buf = self.data * info.len = self.len */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__20, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 192, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__20, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 193, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __PYX_ERR(1, 192, __pyx_L1_error) + __PYX_ERR(1, 193, __pyx_L1_error) - /* "View.MemoryView":191 + /* "View.MemoryView":192 * elif self.mode == u"fortran": * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS * if not (flags & bufmode): # <<<<<<<<<<<<<< @@ -9571,7 +9615,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru */ } - /* "View.MemoryView":193 + /* "View.MemoryView":194 * if not (flags & bufmode): * raise ValueError("Can only create a buffer that is contiguous in memory.") * info.buf = self.data # <<<<<<<<<<<<<< @@ -9581,7 +9625,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru __pyx_t_4 = __pyx_v_self->data; __pyx_v_info->buf = __pyx_t_4; - /* "View.MemoryView":194 + /* "View.MemoryView":195 * raise ValueError("Can only create a buffer that is contiguous in memory.") * info.buf = self.data * info.len = self.len # <<<<<<<<<<<<<< @@ -9591,7 +9635,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru __pyx_t_5 = __pyx_v_self->len; __pyx_v_info->len = __pyx_t_5; - /* "View.MemoryView":195 + /* "View.MemoryView":196 * info.buf = self.data * info.len = self.len * info.ndim = self.ndim # <<<<<<<<<<<<<< @@ -9601,7 +9645,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru __pyx_t_6 = __pyx_v_self->ndim; __pyx_v_info->ndim = __pyx_t_6; - /* "View.MemoryView":196 + /* "View.MemoryView":197 * info.len = self.len * info.ndim = self.ndim * info.shape = self._shape # <<<<<<<<<<<<<< @@ -9611,7 +9655,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru __pyx_t_7 = __pyx_v_self->_shape; __pyx_v_info->shape = __pyx_t_7; - /* "View.MemoryView":197 + /* "View.MemoryView":198 * info.ndim = self.ndim * info.shape = self._shape * info.strides = self._strides # <<<<<<<<<<<<<< @@ -9621,7 +9665,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru __pyx_t_7 = __pyx_v_self->_strides; __pyx_v_info->strides = __pyx_t_7; - /* "View.MemoryView":198 + /* "View.MemoryView":199 * info.shape = self._shape * info.strides = self._strides * info.suboffsets = NULL # <<<<<<<<<<<<<< @@ -9630,7 +9674,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru */ __pyx_v_info->suboffsets = NULL; - /* "View.MemoryView":199 + /* "View.MemoryView":200 * info.strides = self._strides * info.suboffsets = NULL * info.itemsize = self.itemsize # <<<<<<<<<<<<<< @@ -9640,7 +9684,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru __pyx_t_5 = __pyx_v_self->itemsize; __pyx_v_info->itemsize = __pyx_t_5; - /* "View.MemoryView":200 + /* "View.MemoryView":201 * info.suboffsets = NULL * info.itemsize = self.itemsize * info.readonly = 0 # <<<<<<<<<<<<<< @@ -9649,7 +9693,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru */ __pyx_v_info->readonly = 0; - /* "View.MemoryView":202 + /* "View.MemoryView":203 * info.readonly = 0 * * if flags & PyBUF_FORMAT: # <<<<<<<<<<<<<< @@ -9659,7 +9703,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru __pyx_t_1 = ((__pyx_v_flags & PyBUF_FORMAT) != 0); if (__pyx_t_1) { - /* "View.MemoryView":203 + /* "View.MemoryView":204 * * if flags & PyBUF_FORMAT: * info.format = self.format # <<<<<<<<<<<<<< @@ -9669,7 +9713,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru __pyx_t_4 = __pyx_v_self->format; __pyx_v_info->format = __pyx_t_4; - /* "View.MemoryView":202 + /* "View.MemoryView":203 * info.readonly = 0 * * if flags & PyBUF_FORMAT: # <<<<<<<<<<<<<< @@ -9679,7 +9723,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru goto __pyx_L5; } - /* "View.MemoryView":205 + /* "View.MemoryView":206 * info.format = self.format * else: * info.format = NULL # <<<<<<<<<<<<<< @@ -9691,7 +9735,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru } __pyx_L5:; - /* "View.MemoryView":207 + /* "View.MemoryView":208 * info.format = NULL * * info.obj = self # <<<<<<<<<<<<<< @@ -9704,7 +9748,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = ((PyObject *)__pyx_v_self); - /* "View.MemoryView":185 + /* "View.MemoryView":186 * * @cname('getbuffer') * def __getbuffer__(self, Py_buffer *info, int flags): # <<<<<<<<<<<<<< @@ -9734,7 +9778,7 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(stru return __pyx_r; } -/* "View.MemoryView":211 +/* "View.MemoryView":212 * __pyx_getbuffer = capsule( &__pyx_array_getbuffer, "getbuffer(obj, view, flags)") * * def __dealloc__(array self): # <<<<<<<<<<<<<< @@ -9758,7 +9802,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc int __pyx_t_1; __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "View.MemoryView":212 + /* "View.MemoryView":213 * * def __dealloc__(array self): * if self.callback_free_data != NULL: # <<<<<<<<<<<<<< @@ -9768,7 +9812,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc __pyx_t_1 = ((__pyx_v_self->callback_free_data != NULL) != 0); if (__pyx_t_1) { - /* "View.MemoryView":213 + /* "View.MemoryView":214 * def __dealloc__(array self): * if self.callback_free_data != NULL: * self.callback_free_data(self.data) # <<<<<<<<<<<<<< @@ -9777,7 +9821,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc */ __pyx_v_self->callback_free_data(__pyx_v_self->data); - /* "View.MemoryView":212 + /* "View.MemoryView":213 * * def __dealloc__(array self): * if self.callback_free_data != NULL: # <<<<<<<<<<<<<< @@ -9787,7 +9831,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc goto __pyx_L3; } - /* "View.MemoryView":214 + /* "View.MemoryView":215 * if self.callback_free_data != NULL: * self.callback_free_data(self.data) * elif self.free_data: # <<<<<<<<<<<<<< @@ -9797,7 +9841,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc __pyx_t_1 = (__pyx_v_self->free_data != 0); if (__pyx_t_1) { - /* "View.MemoryView":215 + /* "View.MemoryView":216 * self.callback_free_data(self.data) * elif self.free_data: * if self.dtype_is_object: # <<<<<<<<<<<<<< @@ -9807,7 +9851,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc __pyx_t_1 = (__pyx_v_self->dtype_is_object != 0); if (__pyx_t_1) { - /* "View.MemoryView":216 + /* "View.MemoryView":217 * elif self.free_data: * if self.dtype_is_object: * refcount_objects_in_slice(self.data, self._shape, # <<<<<<<<<<<<<< @@ -9816,7 +9860,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc */ __pyx_memoryview_refcount_objects_in_slice(__pyx_v_self->data, __pyx_v_self->_shape, __pyx_v_self->_strides, __pyx_v_self->ndim, 0); - /* "View.MemoryView":215 + /* "View.MemoryView":216 * self.callback_free_data(self.data) * elif self.free_data: * if self.dtype_is_object: # <<<<<<<<<<<<<< @@ -9825,7 +9869,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc */ } - /* "View.MemoryView":218 + /* "View.MemoryView":219 * refcount_objects_in_slice(self.data, self._shape, * self._strides, self.ndim, False) * free(self.data) # <<<<<<<<<<<<<< @@ -9834,7 +9878,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc */ free(__pyx_v_self->data); - /* "View.MemoryView":214 + /* "View.MemoryView":215 * if self.callback_free_data != NULL: * self.callback_free_data(self.data) * elif self.free_data: # <<<<<<<<<<<<<< @@ -9844,7 +9888,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc } __pyx_L3:; - /* "View.MemoryView":219 + /* "View.MemoryView":220 * self._strides, self.ndim, False) * free(self.data) * PyObject_Free(self._shape) # <<<<<<<<<<<<<< @@ -9853,7 +9897,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc */ PyObject_Free(__pyx_v_self->_shape); - /* "View.MemoryView":211 + /* "View.MemoryView":212 * __pyx_getbuffer = capsule( &__pyx_array_getbuffer, "getbuffer(obj, view, flags)") * * def __dealloc__(array self): # <<<<<<<<<<<<<< @@ -9865,7 +9909,7 @@ static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struc __Pyx_RefNannyFinishContext(); } -/* "View.MemoryView":222 +/* "View.MemoryView":223 * * @property * def memview(self): # <<<<<<<<<<<<<< @@ -9895,7 +9939,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_5array_7memview___get__(struct _ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "View.MemoryView":223 + /* "View.MemoryView":224 * @property * def memview(self): * return self.get_memview() # <<<<<<<<<<<<<< @@ -9903,13 +9947,13 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_5array_7memview___get__(struct _ * @cname('get_memview') */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = ((struct __pyx_vtabstruct_array *)__pyx_v_self->__pyx_vtab)->get_memview(__pyx_v_self); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 223, __pyx_L1_error) + __pyx_t_1 = ((struct __pyx_vtabstruct_array *)__pyx_v_self->__pyx_vtab)->get_memview(__pyx_v_self); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 224, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "View.MemoryView":222 + /* "View.MemoryView":223 * * @property * def memview(self): # <<<<<<<<<<<<<< @@ -9928,7 +9972,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_5array_7memview___get__(struct _ return __pyx_r; } -/* "View.MemoryView":226 +/* "View.MemoryView":227 * * @cname('get_memview') * cdef get_memview(self): # <<<<<<<<<<<<<< @@ -9948,7 +9992,7 @@ static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *__pyx_v_self) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_memview", 0); - /* "View.MemoryView":227 + /* "View.MemoryView":228 * @cname('get_memview') * cdef get_memview(self): * flags = PyBUF_ANY_CONTIGUOUS|PyBUF_FORMAT|PyBUF_WRITABLE # <<<<<<<<<<<<<< @@ -9957,7 +10001,7 @@ static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *__pyx_v_self) { */ __pyx_v_flags = ((PyBUF_ANY_CONTIGUOUS | PyBUF_FORMAT) | PyBUF_WRITABLE); - /* "View.MemoryView":228 + /* "View.MemoryView":229 * cdef get_memview(self): * flags = PyBUF_ANY_CONTIGUOUS|PyBUF_FORMAT|PyBUF_WRITABLE * return memoryview(self, flags, self.dtype_is_object) # <<<<<<<<<<<<<< @@ -9965,11 +10009,11 @@ static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *__pyx_v_self) { * def __len__(self): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_flags); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 228, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_flags); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 229, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_self->dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 228, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_self->dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 229, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 228, __pyx_L1_error) + __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 229, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(((PyObject *)__pyx_v_self)); __Pyx_GIVEREF(((PyObject *)__pyx_v_self)); @@ -9980,14 +10024,14 @@ static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *__pyx_v_self) { PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_2); __pyx_t_1 = 0; __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 228, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 229, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "View.MemoryView":226 + /* "View.MemoryView":227 * * @cname('get_memview') * cdef get_memview(self): # <<<<<<<<<<<<<< @@ -10008,7 +10052,7 @@ static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *__pyx_v_self) { return __pyx_r; } -/* "View.MemoryView":230 +/* "View.MemoryView":231 * return memoryview(self, flags, self.dtype_is_object) * * def __len__(self): # <<<<<<<<<<<<<< @@ -10034,7 +10078,7 @@ static Py_ssize_t __pyx_array___pyx_pf_15View_dot_MemoryView_5array_6__len__(str __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__len__", 0); - /* "View.MemoryView":231 + /* "View.MemoryView":232 * * def __len__(self): * return self._shape[0] # <<<<<<<<<<<<<< @@ -10044,7 +10088,7 @@ static Py_ssize_t __pyx_array___pyx_pf_15View_dot_MemoryView_5array_6__len__(str __pyx_r = (__pyx_v_self->_shape[0]); goto __pyx_L0; - /* "View.MemoryView":230 + /* "View.MemoryView":231 * return memoryview(self, flags, self.dtype_is_object) * * def __len__(self): # <<<<<<<<<<<<<< @@ -10058,7 +10102,7 @@ static Py_ssize_t __pyx_array___pyx_pf_15View_dot_MemoryView_5array_6__len__(str return __pyx_r; } -/* "View.MemoryView":233 +/* "View.MemoryView":234 * return self._shape[0] * * def __getattr__(self, attr): # <<<<<<<<<<<<<< @@ -10089,7 +10133,7 @@ static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_8__getattr__( int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getattr__", 0); - /* "View.MemoryView":234 + /* "View.MemoryView":235 * * def __getattr__(self, attr): * return getattr(self.memview, attr) # <<<<<<<<<<<<<< @@ -10097,16 +10141,16 @@ static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_8__getattr__( * def __getitem__(self, item): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 234, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 235, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_GetAttr(__pyx_t_1, __pyx_v_attr); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 234, __pyx_L1_error) + __pyx_t_2 = __Pyx_GetAttr(__pyx_t_1, __pyx_v_attr); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 235, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "View.MemoryView":233 + /* "View.MemoryView":234 * return self._shape[0] * * def __getattr__(self, attr): # <<<<<<<<<<<<<< @@ -10126,7 +10170,7 @@ static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_8__getattr__( return __pyx_r; } -/* "View.MemoryView":236 +/* "View.MemoryView":237 * return getattr(self.memview, attr) * * def __getitem__(self, item): # <<<<<<<<<<<<<< @@ -10157,7 +10201,7 @@ static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_10__getitem__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "View.MemoryView":237 + /* "View.MemoryView":238 * * def __getitem__(self, item): * return self.memview[item] # <<<<<<<<<<<<<< @@ -10165,16 +10209,16 @@ static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_10__getitem__ * def __setitem__(self, item, value): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 237, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 238, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetItem(__pyx_t_1, __pyx_v_item); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 237, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetItem(__pyx_t_1, __pyx_v_item); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 238, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "View.MemoryView":236 + /* "View.MemoryView":237 * return getattr(self.memview, attr) * * def __getitem__(self, item): # <<<<<<<<<<<<<< @@ -10194,7 +10238,7 @@ static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_10__getitem__ return __pyx_r; } -/* "View.MemoryView":239 +/* "View.MemoryView":240 * return self.memview[item] * * def __setitem__(self, item, value): # <<<<<<<<<<<<<< @@ -10224,19 +10268,19 @@ static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_12__setitem__(struc int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__setitem__", 0); - /* "View.MemoryView":240 + /* "View.MemoryView":241 * * def __setitem__(self, item, value): * self.memview[item] = value # <<<<<<<<<<<<<< * * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 240, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_memview); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 241, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - if (unlikely(PyObject_SetItem(__pyx_t_1, __pyx_v_item, __pyx_v_value) < 0)) __PYX_ERR(1, 240, __pyx_L1_error) + if (unlikely(PyObject_SetItem(__pyx_t_1, __pyx_v_item, __pyx_v_value) < 0)) __PYX_ERR(1, 241, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "View.MemoryView":239 + /* "View.MemoryView":240 * return self.memview[item] * * def __setitem__(self, item, value): # <<<<<<<<<<<<<< @@ -10369,7 +10413,7 @@ static PyObject *__pyx_pf___pyx_array_2__setstate_cython__(CYTHON_UNUSED struct return __pyx_r; } -/* "View.MemoryView":244 +/* "View.MemoryView":245 * * @cname("__pyx_array_new") * cdef array array_cwrapper(tuple shape, Py_ssize_t itemsize, char *format, # <<<<<<<<<<<<<< @@ -10391,7 +10435,7 @@ static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize int __pyx_clineno = 0; __Pyx_RefNannySetupContext("array_cwrapper", 0); - /* "View.MemoryView":248 + /* "View.MemoryView":249 * cdef array result * * if buf == NULL: # <<<<<<<<<<<<<< @@ -10401,20 +10445,20 @@ static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize __pyx_t_1 = ((__pyx_v_buf == NULL) != 0); if (__pyx_t_1) { - /* "View.MemoryView":249 + /* "View.MemoryView":250 * * if buf == NULL: * result = array(shape, itemsize, format, mode.decode('ASCII')) # <<<<<<<<<<<<<< * else: * result = array(shape, itemsize, format, mode.decode('ASCII'), */ - __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_itemsize); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 249, __pyx_L1_error) + __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_itemsize); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 250, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_format); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 249, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyBytes_FromString(__pyx_v_format); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 250, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_decode_c_string(__pyx_v_mode, 0, strlen(__pyx_v_mode), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 249, __pyx_L1_error) + __pyx_t_4 = __Pyx_decode_c_string(__pyx_v_mode, 0, strlen(__pyx_v_mode), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 250, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyTuple_New(4); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 249, __pyx_L1_error) + __pyx_t_5 = PyTuple_New(4); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 250, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_INCREF(__pyx_v_shape); __Pyx_GIVEREF(__pyx_v_shape); @@ -10428,13 +10472,13 @@ static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize __pyx_t_2 = 0; __pyx_t_3 = 0; __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_array_type), __pyx_t_5, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 249, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_array_type), __pyx_t_5, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 250, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_v_result = ((struct __pyx_array_obj *)__pyx_t_4); __pyx_t_4 = 0; - /* "View.MemoryView":248 + /* "View.MemoryView":249 * cdef array result * * if buf == NULL: # <<<<<<<<<<<<<< @@ -10444,7 +10488,7 @@ static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize goto __pyx_L3; } - /* "View.MemoryView":251 + /* "View.MemoryView":252 * result = array(shape, itemsize, format, mode.decode('ASCII')) * else: * result = array(shape, itemsize, format, mode.decode('ASCII'), # <<<<<<<<<<<<<< @@ -10452,13 +10496,13 @@ static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize * result.data = buf */ /*else*/ { - __pyx_t_4 = PyInt_FromSsize_t(__pyx_v_itemsize); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 251, __pyx_L1_error) + __pyx_t_4 = PyInt_FromSsize_t(__pyx_v_itemsize); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 252, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = __Pyx_PyBytes_FromString(__pyx_v_format); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 251, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyBytes_FromString(__pyx_v_format); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 252, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_3 = __Pyx_decode_c_string(__pyx_v_mode, 0, strlen(__pyx_v_mode), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 251, __pyx_L1_error) + __pyx_t_3 = __Pyx_decode_c_string(__pyx_v_mode, 0, strlen(__pyx_v_mode), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 252, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_2 = PyTuple_New(4); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 251, __pyx_L1_error) + __pyx_t_2 = PyTuple_New(4); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 252, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(__pyx_v_shape); __Pyx_GIVEREF(__pyx_v_shape); @@ -10473,32 +10517,32 @@ static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize __pyx_t_5 = 0; __pyx_t_3 = 0; - /* "View.MemoryView":252 + /* "View.MemoryView":253 * else: * result = array(shape, itemsize, format, mode.decode('ASCII'), * allocate_buffer=False) # <<<<<<<<<<<<<< * result.data = buf * */ - __pyx_t_3 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 252, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 253, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_allocate_buffer, Py_False) < 0) __PYX_ERR(1, 252, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_allocate_buffer, Py_False) < 0) __PYX_ERR(1, 253, __pyx_L1_error) - /* "View.MemoryView":251 + /* "View.MemoryView":252 * result = array(shape, itemsize, format, mode.decode('ASCII')) * else: * result = array(shape, itemsize, format, mode.decode('ASCII'), # <<<<<<<<<<<<<< * allocate_buffer=False) * result.data = buf */ - __pyx_t_5 = __Pyx_PyObject_Call(((PyObject *)__pyx_array_type), __pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 251, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_Call(((PyObject *)__pyx_array_type), __pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 252, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_result = ((struct __pyx_array_obj *)__pyx_t_5); __pyx_t_5 = 0; - /* "View.MemoryView":253 + /* "View.MemoryView":254 * result = array(shape, itemsize, format, mode.decode('ASCII'), * allocate_buffer=False) * result.data = buf # <<<<<<<<<<<<<< @@ -10509,7 +10553,7 @@ static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize } __pyx_L3:; - /* "View.MemoryView":255 + /* "View.MemoryView":256 * result.data = buf * * return result # <<<<<<<<<<<<<< @@ -10521,7 +10565,7 @@ static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize __pyx_r = __pyx_v_result; goto __pyx_L0; - /* "View.MemoryView":244 + /* "View.MemoryView":245 * * @cname("__pyx_array_new") * cdef array array_cwrapper(tuple shape, Py_ssize_t itemsize, char *format, # <<<<<<<<<<<<<< @@ -10544,7 +10588,7 @@ static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize return __pyx_r; } -/* "View.MemoryView":281 +/* "View.MemoryView":282 * cdef class Enum(object): * cdef object name * def __init__(self, name): # <<<<<<<<<<<<<< @@ -10581,7 +10625,7 @@ static int __pyx_MemviewEnum___init__(PyObject *__pyx_v_self, PyObject *__pyx_ar else goto __pyx_L5_argtuple_error; } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__init__") < 0)) __PYX_ERR(1, 281, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__init__") < 0)) __PYX_ERR(1, 282, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 1) { goto __pyx_L5_argtuple_error; @@ -10592,7 +10636,7 @@ static int __pyx_MemviewEnum___init__(PyObject *__pyx_v_self, PyObject *__pyx_ar } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(1, 281, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(1, 282, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("View.MemoryView.Enum.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -10610,7 +10654,7 @@ static int __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init__(struc __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__init__", 0); - /* "View.MemoryView":282 + /* "View.MemoryView":283 * cdef object name * def __init__(self, name): * self.name = name # <<<<<<<<<<<<<< @@ -10623,7 +10667,7 @@ static int __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init__(struc __Pyx_DECREF(__pyx_v_self->name); __pyx_v_self->name = __pyx_v_name; - /* "View.MemoryView":281 + /* "View.MemoryView":282 * cdef class Enum(object): * cdef object name * def __init__(self, name): # <<<<<<<<<<<<<< @@ -10637,7 +10681,7 @@ static int __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init__(struc return __pyx_r; } -/* "View.MemoryView":283 +/* "View.MemoryView":284 * def __init__(self, name): * self.name = name * def __repr__(self): # <<<<<<<<<<<<<< @@ -10663,7 +10707,7 @@ static PyObject *__pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum_2__repr_ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__repr__", 0); - /* "View.MemoryView":284 + /* "View.MemoryView":285 * self.name = name * def __repr__(self): * return self.name # <<<<<<<<<<<<<< @@ -10675,7 +10719,7 @@ static PyObject *__pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum_2__repr_ __pyx_r = __pyx_v_self->name; goto __pyx_L0; - /* "View.MemoryView":283 + /* "View.MemoryView":284 * def __init__(self, name): * self.name = name * def __repr__(self): # <<<<<<<<<<<<<< @@ -10983,7 +11027,7 @@ static PyObject *__pyx_pf___pyx_MemviewEnum_2__setstate_cython__(struct __pyx_Me return __pyx_r; } -/* "View.MemoryView":298 +/* "View.MemoryView":299 * * @cname('__pyx_align_pointer') * cdef void *align_pointer(void *memory, size_t alignment) nogil: # <<<<<<<<<<<<<< @@ -10997,7 +11041,7 @@ static void *__pyx_align_pointer(void *__pyx_v_memory, size_t __pyx_v_alignment) void *__pyx_r; int __pyx_t_1; - /* "View.MemoryView":300 + /* "View.MemoryView":301 * cdef void *align_pointer(void *memory, size_t alignment) nogil: * "Align pointer memory on a given boundary" * cdef Py_intptr_t aligned_p = memory # <<<<<<<<<<<<<< @@ -11006,7 +11050,7 @@ static void *__pyx_align_pointer(void *__pyx_v_memory, size_t __pyx_v_alignment) */ __pyx_v_aligned_p = ((Py_intptr_t)__pyx_v_memory); - /* "View.MemoryView":304 + /* "View.MemoryView":305 * * with cython.cdivision(True): * offset = aligned_p % alignment # <<<<<<<<<<<<<< @@ -11015,7 +11059,7 @@ static void *__pyx_align_pointer(void *__pyx_v_memory, size_t __pyx_v_alignment) */ __pyx_v_offset = (__pyx_v_aligned_p % __pyx_v_alignment); - /* "View.MemoryView":306 + /* "View.MemoryView":307 * offset = aligned_p % alignment * * if offset > 0: # <<<<<<<<<<<<<< @@ -11025,7 +11069,7 @@ static void *__pyx_align_pointer(void *__pyx_v_memory, size_t __pyx_v_alignment) __pyx_t_1 = ((__pyx_v_offset > 0) != 0); if (__pyx_t_1) { - /* "View.MemoryView":307 + /* "View.MemoryView":308 * * if offset > 0: * aligned_p += alignment - offset # <<<<<<<<<<<<<< @@ -11034,7 +11078,7 @@ static void *__pyx_align_pointer(void *__pyx_v_memory, size_t __pyx_v_alignment) */ __pyx_v_aligned_p = (__pyx_v_aligned_p + (__pyx_v_alignment - __pyx_v_offset)); - /* "View.MemoryView":306 + /* "View.MemoryView":307 * offset = aligned_p % alignment * * if offset > 0: # <<<<<<<<<<<<<< @@ -11043,7 +11087,7 @@ static void *__pyx_align_pointer(void *__pyx_v_memory, size_t __pyx_v_alignment) */ } - /* "View.MemoryView":309 + /* "View.MemoryView":310 * aligned_p += alignment - offset * * return aligned_p # <<<<<<<<<<<<<< @@ -11053,7 +11097,7 @@ static void *__pyx_align_pointer(void *__pyx_v_memory, size_t __pyx_v_alignment) __pyx_r = ((void *)__pyx_v_aligned_p); goto __pyx_L0; - /* "View.MemoryView":298 + /* "View.MemoryView":299 * * @cname('__pyx_align_pointer') * cdef void *align_pointer(void *memory, size_t alignment) nogil: # <<<<<<<<<<<<<< @@ -11066,7 +11110,7 @@ static void *__pyx_align_pointer(void *__pyx_v_memory, size_t __pyx_v_alignment) return __pyx_r; } -/* "View.MemoryView":345 +/* "View.MemoryView":346 * cdef __Pyx_TypeInfo *typeinfo * * def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False): # <<<<<<<<<<<<<< @@ -11111,7 +11155,7 @@ static int __pyx_memoryview___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_ar case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_flags)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 2, 3, 1); __PYX_ERR(1, 345, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 2, 3, 1); __PYX_ERR(1, 346, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: @@ -11121,7 +11165,7 @@ static int __pyx_memoryview___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_ar } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) __PYX_ERR(1, 345, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) __PYX_ERR(1, 346, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -11134,16 +11178,16 @@ static int __pyx_memoryview___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_ar } } __pyx_v_obj = values[0]; - __pyx_v_flags = __Pyx_PyInt_As_int(values[1]); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 345, __pyx_L3_error) + __pyx_v_flags = __Pyx_PyInt_As_int(values[1]); if (unlikely((__pyx_v_flags == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 346, __pyx_L3_error) if (values[2]) { - __pyx_v_dtype_is_object = __Pyx_PyObject_IsTrue(values[2]); if (unlikely((__pyx_v_dtype_is_object == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 345, __pyx_L3_error) + __pyx_v_dtype_is_object = __Pyx_PyObject_IsTrue(values[2]); if (unlikely((__pyx_v_dtype_is_object == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 346, __pyx_L3_error) } else { __pyx_v_dtype_is_object = ((int)0); } } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(1, 345, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(1, 346, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("View.MemoryView.memoryview.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -11168,7 +11212,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__cinit__", 0); - /* "View.MemoryView":346 + /* "View.MemoryView":347 * * def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False): * self.obj = obj # <<<<<<<<<<<<<< @@ -11181,7 +11225,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ __Pyx_DECREF(__pyx_v_self->obj); __pyx_v_self->obj = __pyx_v_obj; - /* "View.MemoryView":347 + /* "View.MemoryView":348 * def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False): * self.obj = obj * self.flags = flags # <<<<<<<<<<<<<< @@ -11190,7 +11234,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ */ __pyx_v_self->flags = __pyx_v_flags; - /* "View.MemoryView":348 + /* "View.MemoryView":349 * self.obj = obj * self.flags = flags * if type(self) is memoryview or obj is not None: # <<<<<<<<<<<<<< @@ -11210,16 +11254,16 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ __pyx_L4_bool_binop_done:; if (__pyx_t_1) { - /* "View.MemoryView":349 + /* "View.MemoryView":350 * self.flags = flags * if type(self) is memoryview or obj is not None: * __Pyx_GetBuffer(obj, &self.view, flags) # <<<<<<<<<<<<<< * if self.view.obj == NULL: * (<__pyx_buffer *> &self.view).obj = Py_None */ - __pyx_t_4 = __Pyx_GetBuffer(__pyx_v_obj, (&__pyx_v_self->view), __pyx_v_flags); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 349, __pyx_L1_error) + __pyx_t_4 = __Pyx_GetBuffer(__pyx_v_obj, (&__pyx_v_self->view), __pyx_v_flags); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 350, __pyx_L1_error) - /* "View.MemoryView":350 + /* "View.MemoryView":351 * if type(self) is memoryview or obj is not None: * __Pyx_GetBuffer(obj, &self.view, flags) * if self.view.obj == NULL: # <<<<<<<<<<<<<< @@ -11229,7 +11273,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ __pyx_t_1 = ((((PyObject *)__pyx_v_self->view.obj) == NULL) != 0); if (__pyx_t_1) { - /* "View.MemoryView":351 + /* "View.MemoryView":352 * __Pyx_GetBuffer(obj, &self.view, flags) * if self.view.obj == NULL: * (<__pyx_buffer *> &self.view).obj = Py_None # <<<<<<<<<<<<<< @@ -11238,16 +11282,16 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ */ ((Py_buffer *)(&__pyx_v_self->view))->obj = Py_None; - /* "View.MemoryView":352 + /* "View.MemoryView":353 * if self.view.obj == NULL: * (<__pyx_buffer *> &self.view).obj = Py_None * Py_INCREF(Py_None) # <<<<<<<<<<<<<< * - * global __pyx_memoryview_thread_locks_used + * if not __PYX_CYTHON_ATOMICS_ENABLED(): */ Py_INCREF(Py_None); - /* "View.MemoryView":350 + /* "View.MemoryView":351 * if type(self) is memoryview or obj is not None: * __Pyx_GetBuffer(obj, &self.view, flags) * if self.view.obj == NULL: # <<<<<<<<<<<<<< @@ -11256,7 +11300,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ */ } - /* "View.MemoryView":348 + /* "View.MemoryView":349 * self.obj = obj * self.flags = flags * if type(self) is memoryview or obj is not None: # <<<<<<<<<<<<<< @@ -11266,100 +11310,119 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ } /* "View.MemoryView":355 + * Py_INCREF(Py_None) * - * global __pyx_memoryview_thread_locks_used - * if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED: # <<<<<<<<<<<<<< - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] - * __pyx_memoryview_thread_locks_used += 1 + * if not __PYX_CYTHON_ATOMICS_ENABLED(): # <<<<<<<<<<<<<< + * global __pyx_memoryview_thread_locks_used + * if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED: */ - __pyx_t_1 = ((__pyx_memoryview_thread_locks_used < 8) != 0); + __pyx_t_1 = ((!(__PYX_CYTHON_ATOMICS_ENABLED() != 0)) != 0); if (__pyx_t_1) { - /* "View.MemoryView":356 - * global __pyx_memoryview_thread_locks_used - * if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED: - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] # <<<<<<<<<<<<<< - * __pyx_memoryview_thread_locks_used += 1 - * if self.lock is NULL: - */ - __pyx_v_self->lock = (__pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]); - /* "View.MemoryView":357 - * if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED: - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] - * __pyx_memoryview_thread_locks_used += 1 # <<<<<<<<<<<<<< - * if self.lock is NULL: - * self.lock = PyThread_allocate_lock() + * if not __PYX_CYTHON_ATOMICS_ENABLED(): + * global __pyx_memoryview_thread_locks_used + * if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED: # <<<<<<<<<<<<<< + * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] + * __pyx_memoryview_thread_locks_used += 1 */ - __pyx_memoryview_thread_locks_used = (__pyx_memoryview_thread_locks_used + 1); + __pyx_t_1 = ((__pyx_memoryview_thread_locks_used < 8) != 0); + if (__pyx_t_1) { - /* "View.MemoryView":355 - * - * global __pyx_memoryview_thread_locks_used - * if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED: # <<<<<<<<<<<<<< - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] - * __pyx_memoryview_thread_locks_used += 1 + /* "View.MemoryView":358 + * global __pyx_memoryview_thread_locks_used + * if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED: + * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] # <<<<<<<<<<<<<< + * __pyx_memoryview_thread_locks_used += 1 + * if self.lock is NULL: */ - } + __pyx_v_self->lock = (__pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]); - /* "View.MemoryView":358 - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] - * __pyx_memoryview_thread_locks_used += 1 - * if self.lock is NULL: # <<<<<<<<<<<<<< - * self.lock = PyThread_allocate_lock() + /* "View.MemoryView":359 + * if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED: + * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] + * __pyx_memoryview_thread_locks_used += 1 # <<<<<<<<<<<<<< * if self.lock is NULL: + * self.lock = PyThread_allocate_lock() */ - __pyx_t_1 = ((__pyx_v_self->lock == NULL) != 0); - if (__pyx_t_1) { + __pyx_memoryview_thread_locks_used = (__pyx_memoryview_thread_locks_used + 1); - /* "View.MemoryView":359 - * __pyx_memoryview_thread_locks_used += 1 - * if self.lock is NULL: - * self.lock = PyThread_allocate_lock() # <<<<<<<<<<<<<< - * if self.lock is NULL: - * raise MemoryError + /* "View.MemoryView":357 + * if not __PYX_CYTHON_ATOMICS_ENABLED(): + * global __pyx_memoryview_thread_locks_used + * if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED: # <<<<<<<<<<<<<< + * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] + * __pyx_memoryview_thread_locks_used += 1 */ - __pyx_v_self->lock = PyThread_allocate_lock(); + } /* "View.MemoryView":360 - * if self.lock is NULL: - * self.lock = PyThread_allocate_lock() + * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] + * __pyx_memoryview_thread_locks_used += 1 * if self.lock is NULL: # <<<<<<<<<<<<<< - * raise MemoryError - * + * self.lock = PyThread_allocate_lock() + * if self.lock is NULL: */ __pyx_t_1 = ((__pyx_v_self->lock == NULL) != 0); - if (unlikely(__pyx_t_1)) { + if (__pyx_t_1) { /* "View.MemoryView":361 - * self.lock = PyThread_allocate_lock() + * __pyx_memoryview_thread_locks_used += 1 * if self.lock is NULL: - * raise MemoryError # <<<<<<<<<<<<<< + * self.lock = PyThread_allocate_lock() # <<<<<<<<<<<<<< + * if self.lock is NULL: + * raise MemoryError + */ + __pyx_v_self->lock = PyThread_allocate_lock(); + + /* "View.MemoryView":362 + * if self.lock is NULL: + * self.lock = PyThread_allocate_lock() + * if self.lock is NULL: # <<<<<<<<<<<<<< + * raise MemoryError + * + */ + __pyx_t_1 = ((__pyx_v_self->lock == NULL) != 0); + if (unlikely(__pyx_t_1)) { + + /* "View.MemoryView":363 + * self.lock = PyThread_allocate_lock() + * if self.lock is NULL: + * raise MemoryError # <<<<<<<<<<<<<< * * if flags & PyBUF_FORMAT: */ - PyErr_NoMemory(); __PYX_ERR(1, 361, __pyx_L1_error) + PyErr_NoMemory(); __PYX_ERR(1, 363, __pyx_L1_error) + + /* "View.MemoryView":362 + * if self.lock is NULL: + * self.lock = PyThread_allocate_lock() + * if self.lock is NULL: # <<<<<<<<<<<<<< + * raise MemoryError + * + */ + } /* "View.MemoryView":360 - * if self.lock is NULL: - * self.lock = PyThread_allocate_lock() + * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] + * __pyx_memoryview_thread_locks_used += 1 * if self.lock is NULL: # <<<<<<<<<<<<<< - * raise MemoryError - * + * self.lock = PyThread_allocate_lock() + * if self.lock is NULL: */ } - /* "View.MemoryView":358 - * self.lock = __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] - * __pyx_memoryview_thread_locks_used += 1 - * if self.lock is NULL: # <<<<<<<<<<<<<< - * self.lock = PyThread_allocate_lock() - * if self.lock is NULL: + /* "View.MemoryView":355 + * Py_INCREF(Py_None) + * + * if not __PYX_CYTHON_ATOMICS_ENABLED(): # <<<<<<<<<<<<<< + * global __pyx_memoryview_thread_locks_used + * if __pyx_memoryview_thread_locks_used < THREAD_LOCKS_PREALLOCATED: */ } - /* "View.MemoryView":363 - * raise MemoryError + /* "View.MemoryView":365 + * raise MemoryError * * if flags & PyBUF_FORMAT: # <<<<<<<<<<<<<< * self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0') @@ -11368,7 +11431,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ __pyx_t_1 = ((__pyx_v_flags & PyBUF_FORMAT) != 0); if (__pyx_t_1) { - /* "View.MemoryView":364 + /* "View.MemoryView":366 * * if flags & PyBUF_FORMAT: * self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0') # <<<<<<<<<<<<<< @@ -11379,24 +11442,24 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ if (__pyx_t_2) { } else { __pyx_t_1 = __pyx_t_2; - goto __pyx_L11_bool_binop_done; + goto __pyx_L12_bool_binop_done; } __pyx_t_2 = (((__pyx_v_self->view.format[1]) == '\x00') != 0); __pyx_t_1 = __pyx_t_2; - __pyx_L11_bool_binop_done:; + __pyx_L12_bool_binop_done:; __pyx_v_self->dtype_is_object = __pyx_t_1; - /* "View.MemoryView":363 - * raise MemoryError + /* "View.MemoryView":365 + * raise MemoryError * * if flags & PyBUF_FORMAT: # <<<<<<<<<<<<<< * self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0') * else: */ - goto __pyx_L10; + goto __pyx_L11; } - /* "View.MemoryView":366 + /* "View.MemoryView":368 * self.dtype_is_object = (self.view.format[0] == b'O' and self.view.format[1] == b'\0') * else: * self.dtype_is_object = dtype_is_object # <<<<<<<<<<<<<< @@ -11406,9 +11469,9 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ /*else*/ { __pyx_v_self->dtype_is_object = __pyx_v_dtype_is_object; } - __pyx_L10:; + __pyx_L11:; - /* "View.MemoryView":368 + /* "View.MemoryView":370 * self.dtype_is_object = dtype_is_object * * self.acquisition_count_aligned_p = <__pyx_atomic_int *> align_pointer( # <<<<<<<<<<<<<< @@ -11417,7 +11480,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ */ __pyx_v_self->acquisition_count_aligned_p = ((__pyx_atomic_int *)__pyx_align_pointer(((void *)(&(__pyx_v_self->acquisition_count[0]))), (sizeof(__pyx_atomic_int)))); - /* "View.MemoryView":370 + /* "View.MemoryView":372 * self.acquisition_count_aligned_p = <__pyx_atomic_int *> align_pointer( * &self.acquisition_count[0], sizeof(__pyx_atomic_int)) * self.typeinfo = NULL # <<<<<<<<<<<<<< @@ -11426,7 +11489,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ */ __pyx_v_self->typeinfo = NULL; - /* "View.MemoryView":345 + /* "View.MemoryView":346 * cdef __Pyx_TypeInfo *typeinfo * * def __cinit__(memoryview self, object obj, int flags, bint dtype_is_object=False): # <<<<<<<<<<<<<< @@ -11445,7 +11508,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit_ return __pyx_r; } -/* "View.MemoryView":372 +/* "View.MemoryView":374 * self.typeinfo = NULL * * def __dealloc__(memoryview self): # <<<<<<<<<<<<<< @@ -11476,7 +11539,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal PyThread_type_lock __pyx_t_7; __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "View.MemoryView":373 + /* "View.MemoryView":375 * * def __dealloc__(memoryview self): * if self.obj is not None: # <<<<<<<<<<<<<< @@ -11487,7 +11550,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "View.MemoryView":374 + /* "View.MemoryView":376 * def __dealloc__(memoryview self): * if self.obj is not None: * __Pyx_ReleaseBuffer(&self.view) # <<<<<<<<<<<<<< @@ -11496,7 +11559,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal */ __Pyx_ReleaseBuffer((&__pyx_v_self->view)); - /* "View.MemoryView":373 + /* "View.MemoryView":375 * * def __dealloc__(memoryview self): * if self.obj is not None: # <<<<<<<<<<<<<< @@ -11506,7 +11569,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal goto __pyx_L3; } - /* "View.MemoryView":375 + /* "View.MemoryView":377 * if self.obj is not None: * __Pyx_ReleaseBuffer(&self.view) * elif (<__pyx_buffer *> &self.view).obj == Py_None: # <<<<<<<<<<<<<< @@ -11516,7 +11579,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal __pyx_t_2 = ((((Py_buffer *)(&__pyx_v_self->view))->obj == Py_None) != 0); if (__pyx_t_2) { - /* "View.MemoryView":377 + /* "View.MemoryView":379 * elif (<__pyx_buffer *> &self.view).obj == Py_None: * * (<__pyx_buffer *> &self.view).obj = NULL # <<<<<<<<<<<<<< @@ -11525,7 +11588,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal */ ((Py_buffer *)(&__pyx_v_self->view))->obj = NULL; - /* "View.MemoryView":378 + /* "View.MemoryView":380 * * (<__pyx_buffer *> &self.view).obj = NULL * Py_DECREF(Py_None) # <<<<<<<<<<<<<< @@ -11534,7 +11597,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal */ Py_DECREF(Py_None); - /* "View.MemoryView":375 + /* "View.MemoryView":377 * if self.obj is not None: * __Pyx_ReleaseBuffer(&self.view) * elif (<__pyx_buffer *> &self.view).obj == Py_None: # <<<<<<<<<<<<<< @@ -11544,7 +11607,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal } __pyx_L3:; - /* "View.MemoryView":382 + /* "View.MemoryView":384 * cdef int i * global __pyx_memoryview_thread_locks_used * if self.lock != NULL: # <<<<<<<<<<<<<< @@ -11554,7 +11617,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal __pyx_t_2 = ((__pyx_v_self->lock != NULL) != 0); if (__pyx_t_2) { - /* "View.MemoryView":383 + /* "View.MemoryView":385 * global __pyx_memoryview_thread_locks_used * if self.lock != NULL: * for i in range(__pyx_memoryview_thread_locks_used): # <<<<<<<<<<<<<< @@ -11566,7 +11629,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_i = __pyx_t_5; - /* "View.MemoryView":384 + /* "View.MemoryView":386 * if self.lock != NULL: * for i in range(__pyx_memoryview_thread_locks_used): * if __pyx_memoryview_thread_locks[i] is self.lock: # <<<<<<<<<<<<<< @@ -11576,7 +11639,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal __pyx_t_2 = (((__pyx_memoryview_thread_locks[__pyx_v_i]) == __pyx_v_self->lock) != 0); if (__pyx_t_2) { - /* "View.MemoryView":385 + /* "View.MemoryView":387 * for i in range(__pyx_memoryview_thread_locks_used): * if __pyx_memoryview_thread_locks[i] is self.lock: * __pyx_memoryview_thread_locks_used -= 1 # <<<<<<<<<<<<<< @@ -11585,7 +11648,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal */ __pyx_memoryview_thread_locks_used = (__pyx_memoryview_thread_locks_used - 1); - /* "View.MemoryView":386 + /* "View.MemoryView":388 * if __pyx_memoryview_thread_locks[i] is self.lock: * __pyx_memoryview_thread_locks_used -= 1 * if i != __pyx_memoryview_thread_locks_used: # <<<<<<<<<<<<<< @@ -11595,7 +11658,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal __pyx_t_2 = ((__pyx_v_i != __pyx_memoryview_thread_locks_used) != 0); if (__pyx_t_2) { - /* "View.MemoryView":388 + /* "View.MemoryView":390 * if i != __pyx_memoryview_thread_locks_used: * __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = ( * __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i]) # <<<<<<<<<<<<<< @@ -11605,7 +11668,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal __pyx_t_6 = (__pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]); __pyx_t_7 = (__pyx_memoryview_thread_locks[__pyx_v_i]); - /* "View.MemoryView":387 + /* "View.MemoryView":389 * __pyx_memoryview_thread_locks_used -= 1 * if i != __pyx_memoryview_thread_locks_used: * __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = ( # <<<<<<<<<<<<<< @@ -11615,7 +11678,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal (__pyx_memoryview_thread_locks[__pyx_v_i]) = __pyx_t_6; (__pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used]) = __pyx_t_7; - /* "View.MemoryView":386 + /* "View.MemoryView":388 * if __pyx_memoryview_thread_locks[i] is self.lock: * __pyx_memoryview_thread_locks_used -= 1 * if i != __pyx_memoryview_thread_locks_used: # <<<<<<<<<<<<<< @@ -11624,7 +11687,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal */ } - /* "View.MemoryView":389 + /* "View.MemoryView":391 * __pyx_memoryview_thread_locks[i], __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used] = ( * __pyx_memoryview_thread_locks[__pyx_memoryview_thread_locks_used], __pyx_memoryview_thread_locks[i]) * break # <<<<<<<<<<<<<< @@ -11633,7 +11696,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal */ goto __pyx_L6_break; - /* "View.MemoryView":384 + /* "View.MemoryView":386 * if self.lock != NULL: * for i in range(__pyx_memoryview_thread_locks_used): * if __pyx_memoryview_thread_locks[i] is self.lock: # <<<<<<<<<<<<<< @@ -11644,7 +11707,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal } /*else*/ { - /* "View.MemoryView":391 + /* "View.MemoryView":393 * break * else: * PyThread_free_lock(self.lock) # <<<<<<<<<<<<<< @@ -11655,7 +11718,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal } __pyx_L6_break:; - /* "View.MemoryView":382 + /* "View.MemoryView":384 * cdef int i * global __pyx_memoryview_thread_locks_used * if self.lock != NULL: # <<<<<<<<<<<<<< @@ -11664,7 +11727,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal */ } - /* "View.MemoryView":372 + /* "View.MemoryView":374 * self.typeinfo = NULL * * def __dealloc__(memoryview self): # <<<<<<<<<<<<<< @@ -11676,7 +11739,7 @@ static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__deal __Pyx_RefNannyFinishContext(); } -/* "View.MemoryView":393 +/* "View.MemoryView":395 * PyThread_free_lock(self.lock) * * cdef char *get_item_pointer(memoryview self, object index) except NULL: # <<<<<<<<<<<<<< @@ -11702,7 +11765,7 @@ static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__py int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_item_pointer", 0); - /* "View.MemoryView":395 + /* "View.MemoryView":397 * cdef char *get_item_pointer(memoryview self, object index) except NULL: * cdef Py_ssize_t dim * cdef char *itemp = self.view.buf # <<<<<<<<<<<<<< @@ -11711,7 +11774,7 @@ static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__py */ __pyx_v_itemp = ((char *)__pyx_v_self->view.buf); - /* "View.MemoryView":397 + /* "View.MemoryView":399 * cdef char *itemp = self.view.buf * * for dim, idx in enumerate(index): # <<<<<<<<<<<<<< @@ -11723,26 +11786,26 @@ static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__py __pyx_t_2 = __pyx_v_index; __Pyx_INCREF(__pyx_t_2); __pyx_t_3 = 0; __pyx_t_4 = NULL; } else { - __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_index); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 397, __pyx_L1_error) + __pyx_t_3 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_index); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 399, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 397, __pyx_L1_error) + __pyx_t_4 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 399, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_4)) { if (likely(PyList_CheckExact(__pyx_t_2))) { if (__pyx_t_3 >= PyList_GET_SIZE(__pyx_t_2)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) __PYX_ERR(1, 397, __pyx_L1_error) + __pyx_t_5 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) __PYX_ERR(1, 399, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 397, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 399, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } else { if (__pyx_t_3 >= PyTuple_GET_SIZE(__pyx_t_2)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) __PYX_ERR(1, 397, __pyx_L1_error) + __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_3); __Pyx_INCREF(__pyx_t_5); __pyx_t_3++; if (unlikely(0 < 0)) __PYX_ERR(1, 399, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 397, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_2, __pyx_t_3); __pyx_t_3++; if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 399, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } @@ -11752,7 +11815,7 @@ static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__py PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(1, 397, __pyx_L1_error) + else __PYX_ERR(1, 399, __pyx_L1_error) } break; } @@ -11763,18 +11826,18 @@ static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__py __pyx_v_dim = __pyx_t_1; __pyx_t_1 = (__pyx_t_1 + 1); - /* "View.MemoryView":398 + /* "View.MemoryView":400 * * for dim, idx in enumerate(index): * itemp = pybuffer_index(&self.view, itemp, idx, dim) # <<<<<<<<<<<<<< * * return itemp */ - __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_v_idx); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 398, __pyx_L1_error) - __pyx_t_7 = __pyx_pybuffer_index((&__pyx_v_self->view), __pyx_v_itemp, __pyx_t_6, __pyx_v_dim); if (unlikely(__pyx_t_7 == ((char *)NULL))) __PYX_ERR(1, 398, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_v_idx); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 400, __pyx_L1_error) + __pyx_t_7 = __pyx_pybuffer_index((&__pyx_v_self->view), __pyx_v_itemp, __pyx_t_6, __pyx_v_dim); if (unlikely(__pyx_t_7 == ((char *)NULL))) __PYX_ERR(1, 400, __pyx_L1_error) __pyx_v_itemp = __pyx_t_7; - /* "View.MemoryView":397 + /* "View.MemoryView":399 * cdef char *itemp = self.view.buf * * for dim, idx in enumerate(index): # <<<<<<<<<<<<<< @@ -11784,7 +11847,7 @@ static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__py } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "View.MemoryView":400 + /* "View.MemoryView":402 * itemp = pybuffer_index(&self.view, itemp, idx, dim) * * return itemp # <<<<<<<<<<<<<< @@ -11794,7 +11857,7 @@ static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__py __pyx_r = __pyx_v_itemp; goto __pyx_L0; - /* "View.MemoryView":393 + /* "View.MemoryView":395 * PyThread_free_lock(self.lock) * * cdef char *get_item_pointer(memoryview self, object index) except NULL: # <<<<<<<<<<<<<< @@ -11814,7 +11877,7 @@ static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__py return __pyx_r; } -/* "View.MemoryView":403 +/* "View.MemoryView":405 * * * def __getitem__(memoryview self, object index): # <<<<<<<<<<<<<< @@ -11852,7 +11915,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__getitem__", 0); - /* "View.MemoryView":404 + /* "View.MemoryView":406 * * def __getitem__(memoryview self, object index): * if index is Ellipsis: # <<<<<<<<<<<<<< @@ -11863,7 +11926,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4_ __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "View.MemoryView":405 + /* "View.MemoryView":407 * def __getitem__(memoryview self, object index): * if index is Ellipsis: * return self # <<<<<<<<<<<<<< @@ -11875,7 +11938,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4_ __pyx_r = ((PyObject *)__pyx_v_self); goto __pyx_L0; - /* "View.MemoryView":404 + /* "View.MemoryView":406 * * def __getitem__(memoryview self, object index): * if index is Ellipsis: # <<<<<<<<<<<<<< @@ -11884,14 +11947,14 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4_ */ } - /* "View.MemoryView":407 + /* "View.MemoryView":409 * return self * * have_slices, indices = _unellipsify(index, self.view.ndim) # <<<<<<<<<<<<<< * * cdef char *itemp */ - __pyx_t_3 = _unellipsify(__pyx_v_index, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 407, __pyx_L1_error) + __pyx_t_3 = _unellipsify(__pyx_v_index, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 409, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); if (likely(__pyx_t_3 != Py_None)) { PyObject* sequence = __pyx_t_3; @@ -11899,7 +11962,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4_ if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - __PYX_ERR(1, 407, __pyx_L1_error) + __PYX_ERR(1, 409, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_4 = PyTuple_GET_ITEM(sequence, 0); @@ -11907,31 +11970,31 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4_ __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(__pyx_t_5); #else - __pyx_t_4 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 407, __pyx_L1_error) + __pyx_t_4 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 409, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 407, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 409, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __Pyx_RaiseNoneNotIterableError(); __PYX_ERR(1, 407, __pyx_L1_error) + __Pyx_RaiseNoneNotIterableError(); __PYX_ERR(1, 409, __pyx_L1_error) } __pyx_v_have_slices = __pyx_t_4; __pyx_t_4 = 0; __pyx_v_indices = __pyx_t_5; __pyx_t_5 = 0; - /* "View.MemoryView":410 + /* "View.MemoryView":412 * * cdef char *itemp * if have_slices: # <<<<<<<<<<<<<< * return memview_slice(self, indices) * else: */ - __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_have_slices); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(1, 410, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_have_slices); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(1, 412, __pyx_L1_error) if (__pyx_t_2) { - /* "View.MemoryView":411 + /* "View.MemoryView":413 * cdef char *itemp * if have_slices: * return memview_slice(self, indices) # <<<<<<<<<<<<<< @@ -11939,13 +12002,13 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4_ * itemp = self.get_item_pointer(indices) */ __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = ((PyObject *)__pyx_memview_slice(__pyx_v_self, __pyx_v_indices)); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 411, __pyx_L1_error) + __pyx_t_3 = ((PyObject *)__pyx_memview_slice(__pyx_v_self, __pyx_v_indices)); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 413, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_r = __pyx_t_3; __pyx_t_3 = 0; goto __pyx_L0; - /* "View.MemoryView":410 + /* "View.MemoryView":412 * * cdef char *itemp * if have_slices: # <<<<<<<<<<<<<< @@ -11954,7 +12017,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4_ */ } - /* "View.MemoryView":413 + /* "View.MemoryView":415 * return memview_slice(self, indices) * else: * itemp = self.get_item_pointer(indices) # <<<<<<<<<<<<<< @@ -11962,10 +12025,10 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4_ * */ /*else*/ { - __pyx_t_6 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->get_item_pointer(__pyx_v_self, __pyx_v_indices); if (unlikely(__pyx_t_6 == ((char *)NULL))) __PYX_ERR(1, 413, __pyx_L1_error) + __pyx_t_6 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->get_item_pointer(__pyx_v_self, __pyx_v_indices); if (unlikely(__pyx_t_6 == ((char *)NULL))) __PYX_ERR(1, 415, __pyx_L1_error) __pyx_v_itemp = __pyx_t_6; - /* "View.MemoryView":414 + /* "View.MemoryView":416 * else: * itemp = self.get_item_pointer(indices) * return self.convert_item_to_object(itemp) # <<<<<<<<<<<<<< @@ -11973,14 +12036,14 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4_ * def __setitem__(memoryview self, object index, object value): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->convert_item_to_object(__pyx_v_self, __pyx_v_itemp); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 414, __pyx_L1_error) + __pyx_t_3 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->convert_item_to_object(__pyx_v_self, __pyx_v_itemp); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 416, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_r = __pyx_t_3; __pyx_t_3 = 0; goto __pyx_L0; } - /* "View.MemoryView":403 + /* "View.MemoryView":405 * * * def __getitem__(memoryview self, object index): # <<<<<<<<<<<<<< @@ -12003,7 +12066,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4_ return __pyx_r; } -/* "View.MemoryView":416 +/* "View.MemoryView":418 * return self.convert_item_to_object(itemp) * * def __setitem__(memoryview self, object index, object value): # <<<<<<<<<<<<<< @@ -12039,7 +12102,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setit __Pyx_RefNannySetupContext("__setitem__", 0); __Pyx_INCREF(__pyx_v_index); - /* "View.MemoryView":417 + /* "View.MemoryView":419 * * def __setitem__(memoryview self, object index, object value): * if self.view.readonly: # <<<<<<<<<<<<<< @@ -12049,20 +12112,20 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setit __pyx_t_1 = (__pyx_v_self->view.readonly != 0); if (unlikely(__pyx_t_1)) { - /* "View.MemoryView":418 + /* "View.MemoryView":420 * def __setitem__(memoryview self, object index, object value): * if self.view.readonly: * raise TypeError("Cannot assign to read-only memoryview") # <<<<<<<<<<<<<< * * have_slices, index = _unellipsify(index, self.view.ndim) */ - __pyx_t_2 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_tuple__23, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 418, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_tuple__23, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 420, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_Raise(__pyx_t_2, 0, 0, 0); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __PYX_ERR(1, 418, __pyx_L1_error) + __PYX_ERR(1, 420, __pyx_L1_error) - /* "View.MemoryView":417 + /* "View.MemoryView":419 * * def __setitem__(memoryview self, object index, object value): * if self.view.readonly: # <<<<<<<<<<<<<< @@ -12071,14 +12134,14 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setit */ } - /* "View.MemoryView":420 + /* "View.MemoryView":422 * raise TypeError("Cannot assign to read-only memoryview") * * have_slices, index = _unellipsify(index, self.view.ndim) # <<<<<<<<<<<<<< * * if have_slices: */ - __pyx_t_2 = _unellipsify(__pyx_v_index, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 420, __pyx_L1_error) + __pyx_t_2 = _unellipsify(__pyx_v_index, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 422, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); if (likely(__pyx_t_2 != Py_None)) { PyObject* sequence = __pyx_t_2; @@ -12086,7 +12149,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setit if (unlikely(size != 2)) { if (size > 2) __Pyx_RaiseTooManyValuesError(2); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - __PYX_ERR(1, 420, __pyx_L1_error) + __PYX_ERR(1, 422, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); @@ -12094,67 +12157,67 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setit __Pyx_INCREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_4); #else - __pyx_t_3 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 420, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 422, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 420, __pyx_L1_error) + __pyx_t_4 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 422, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); #endif __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } else { - __Pyx_RaiseNoneNotIterableError(); __PYX_ERR(1, 420, __pyx_L1_error) + __Pyx_RaiseNoneNotIterableError(); __PYX_ERR(1, 422, __pyx_L1_error) } __pyx_v_have_slices = __pyx_t_3; __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_index, __pyx_t_4); __pyx_t_4 = 0; - /* "View.MemoryView":422 + /* "View.MemoryView":424 * have_slices, index = _unellipsify(index, self.view.ndim) * * if have_slices: # <<<<<<<<<<<<<< * obj = self.is_slice(value) * if obj: */ - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_have_slices); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 422, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_have_slices); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 424, __pyx_L1_error) if (__pyx_t_1) { - /* "View.MemoryView":423 + /* "View.MemoryView":425 * * if have_slices: * obj = self.is_slice(value) # <<<<<<<<<<<<<< * if obj: * self.setitem_slice_assignment(self[index], obj) */ - __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->is_slice(__pyx_v_self, __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 423, __pyx_L1_error) + __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->is_slice(__pyx_v_self, __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 425, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_v_obj = __pyx_t_2; __pyx_t_2 = 0; - /* "View.MemoryView":424 + /* "View.MemoryView":426 * if have_slices: * obj = self.is_slice(value) * if obj: # <<<<<<<<<<<<<< * self.setitem_slice_assignment(self[index], obj) * else: */ - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_obj); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 424, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_obj); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 426, __pyx_L1_error) if (__pyx_t_1) { - /* "View.MemoryView":425 + /* "View.MemoryView":427 * obj = self.is_slice(value) * if obj: * self.setitem_slice_assignment(self[index], obj) # <<<<<<<<<<<<<< * else: * self.setitem_slice_assign_scalar(self[index], value) */ - __pyx_t_2 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_self), __pyx_v_index); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 425, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_self), __pyx_v_index); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 427, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_4 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_slice_assignment(__pyx_v_self, __pyx_t_2, __pyx_v_obj); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 425, __pyx_L1_error) + __pyx_t_4 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_slice_assignment(__pyx_v_self, __pyx_t_2, __pyx_v_obj); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 427, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "View.MemoryView":424 + /* "View.MemoryView":426 * if have_slices: * obj = self.is_slice(value) * if obj: # <<<<<<<<<<<<<< @@ -12164,7 +12227,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setit goto __pyx_L5; } - /* "View.MemoryView":427 + /* "View.MemoryView":429 * self.setitem_slice_assignment(self[index], obj) * else: * self.setitem_slice_assign_scalar(self[index], value) # <<<<<<<<<<<<<< @@ -12172,17 +12235,17 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setit * self.setitem_indexed(index, value) */ /*else*/ { - __pyx_t_4 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_self), __pyx_v_index); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 427, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_self), __pyx_v_index); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_memoryview_type))))) __PYX_ERR(1, 427, __pyx_L1_error) - __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_slice_assign_scalar(__pyx_v_self, ((struct __pyx_memoryview_obj *)__pyx_t_4), __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 427, __pyx_L1_error) + if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_memoryview_type))))) __PYX_ERR(1, 429, __pyx_L1_error) + __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_slice_assign_scalar(__pyx_v_self, ((struct __pyx_memoryview_obj *)__pyx_t_4), __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } __pyx_L5:; - /* "View.MemoryView":422 + /* "View.MemoryView":424 * have_slices, index = _unellipsify(index, self.view.ndim) * * if have_slices: # <<<<<<<<<<<<<< @@ -12192,7 +12255,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setit goto __pyx_L4; } - /* "View.MemoryView":429 + /* "View.MemoryView":431 * self.setitem_slice_assign_scalar(self[index], value) * else: * self.setitem_indexed(index, value) # <<<<<<<<<<<<<< @@ -12200,13 +12263,13 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setit * cdef is_slice(self, obj): */ /*else*/ { - __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_indexed(__pyx_v_self, __pyx_v_index, __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 429, __pyx_L1_error) + __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->setitem_indexed(__pyx_v_self, __pyx_v_index, __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } __pyx_L4:; - /* "View.MemoryView":416 + /* "View.MemoryView":418 * return self.convert_item_to_object(itemp) * * def __setitem__(memoryview self, object index, object value): # <<<<<<<<<<<<<< @@ -12231,7 +12294,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setit return __pyx_r; } -/* "View.MemoryView":431 +/* "View.MemoryView":433 * self.setitem_indexed(index, value) * * cdef is_slice(self, obj): # <<<<<<<<<<<<<< @@ -12257,7 +12320,7 @@ static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_ __Pyx_RefNannySetupContext("is_slice", 0); __Pyx_INCREF(__pyx_v_obj); - /* "View.MemoryView":432 + /* "View.MemoryView":434 * * cdef is_slice(self, obj): * if not isinstance(obj, memoryview): # <<<<<<<<<<<<<< @@ -12268,7 +12331,7 @@ static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_ __pyx_t_2 = ((!(__pyx_t_1 != 0)) != 0); if (__pyx_t_2) { - /* "View.MemoryView":433 + /* "View.MemoryView":435 * cdef is_slice(self, obj): * if not isinstance(obj, memoryview): * try: # <<<<<<<<<<<<<< @@ -12284,34 +12347,34 @@ static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_ __Pyx_XGOTREF(__pyx_t_5); /*try:*/ { - /* "View.MemoryView":434 + /* "View.MemoryView":436 * if not isinstance(obj, memoryview): * try: * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, # <<<<<<<<<<<<<< * self.dtype_is_object) * except TypeError: */ - __pyx_t_6 = __Pyx_PyInt_From_int(((__pyx_v_self->flags & (~PyBUF_WRITABLE)) | PyBUF_ANY_CONTIGUOUS)); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 434, __pyx_L4_error) + __pyx_t_6 = __Pyx_PyInt_From_int(((__pyx_v_self->flags & (~PyBUF_WRITABLE)) | PyBUF_ANY_CONTIGUOUS)); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 436, __pyx_L4_error) __Pyx_GOTREF(__pyx_t_6); - /* "View.MemoryView":435 + /* "View.MemoryView":437 * try: * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, * self.dtype_is_object) # <<<<<<<<<<<<<< * except TypeError: * return None */ - __pyx_t_7 = __Pyx_PyBool_FromLong(__pyx_v_self->dtype_is_object); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 435, __pyx_L4_error) + __pyx_t_7 = __Pyx_PyBool_FromLong(__pyx_v_self->dtype_is_object); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 437, __pyx_L4_error) __Pyx_GOTREF(__pyx_t_7); - /* "View.MemoryView":434 + /* "View.MemoryView":436 * if not isinstance(obj, memoryview): * try: * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, # <<<<<<<<<<<<<< * self.dtype_is_object) * except TypeError: */ - __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 434, __pyx_L4_error) + __pyx_t_8 = PyTuple_New(3); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 436, __pyx_L4_error) __Pyx_GOTREF(__pyx_t_8); __Pyx_INCREF(__pyx_v_obj); __Pyx_GIVEREF(__pyx_v_obj); @@ -12322,13 +12385,13 @@ static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_ PyTuple_SET_ITEM(__pyx_t_8, 2, __pyx_t_7); __pyx_t_6 = 0; __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_8, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 434, __pyx_L4_error) + __pyx_t_7 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_8, NULL); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 436, __pyx_L4_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __Pyx_DECREF_SET(__pyx_v_obj, __pyx_t_7); __pyx_t_7 = 0; - /* "View.MemoryView":433 + /* "View.MemoryView":435 * cdef is_slice(self, obj): * if not isinstance(obj, memoryview): * try: # <<<<<<<<<<<<<< @@ -12345,7 +12408,7 @@ static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_ __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "View.MemoryView":436 + /* "View.MemoryView":438 * obj = memoryview(obj, self.flags & ~PyBUF_WRITABLE | PyBUF_ANY_CONTIGUOUS, * self.dtype_is_object) * except TypeError: # <<<<<<<<<<<<<< @@ -12355,12 +12418,12 @@ static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_ __pyx_t_9 = __Pyx_PyErr_ExceptionMatches(__pyx_builtin_TypeError); if (__pyx_t_9) { __Pyx_AddTraceback("View.MemoryView.memoryview.is_slice", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_7, &__pyx_t_8, &__pyx_t_6) < 0) __PYX_ERR(1, 436, __pyx_L6_except_error) + if (__Pyx_GetException(&__pyx_t_7, &__pyx_t_8, &__pyx_t_6) < 0) __PYX_ERR(1, 438, __pyx_L6_except_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_GOTREF(__pyx_t_8); __Pyx_GOTREF(__pyx_t_6); - /* "View.MemoryView":437 + /* "View.MemoryView":439 * self.dtype_is_object) * except TypeError: * return None # <<<<<<<<<<<<<< @@ -12377,7 +12440,7 @@ static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_ goto __pyx_L6_except_error; __pyx_L6_except_error:; - /* "View.MemoryView":433 + /* "View.MemoryView":435 * cdef is_slice(self, obj): * if not isinstance(obj, memoryview): * try: # <<<<<<<<<<<<<< @@ -12398,7 +12461,7 @@ static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_ __pyx_L9_try_end:; } - /* "View.MemoryView":432 + /* "View.MemoryView":434 * * cdef is_slice(self, obj): * if not isinstance(obj, memoryview): # <<<<<<<<<<<<<< @@ -12407,7 +12470,7 @@ static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_ */ } - /* "View.MemoryView":439 + /* "View.MemoryView":441 * return None * * return obj # <<<<<<<<<<<<<< @@ -12419,7 +12482,7 @@ static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_ __pyx_r = __pyx_v_obj; goto __pyx_L0; - /* "View.MemoryView":431 + /* "View.MemoryView":433 * self.setitem_indexed(index, value) * * cdef is_slice(self, obj): # <<<<<<<<<<<<<< @@ -12441,7 +12504,7 @@ static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_ return __pyx_r; } -/* "View.MemoryView":441 +/* "View.MemoryView":443 * return obj * * cdef setitem_slice_assignment(self, dst, src): # <<<<<<<<<<<<<< @@ -12465,52 +12528,52 @@ static PyObject *__pyx_memoryview_setitem_slice_assignment(struct __pyx_memoryvi int __pyx_clineno = 0; __Pyx_RefNannySetupContext("setitem_slice_assignment", 0); - /* "View.MemoryView":445 + /* "View.MemoryView":447 * cdef __Pyx_memviewslice src_slice * * memoryview_copy_contents(get_slice_from_memview(src, &src_slice)[0], # <<<<<<<<<<<<<< * get_slice_from_memview(dst, &dst_slice)[0], * src.ndim, dst.ndim, self.dtype_is_object) */ - if (!(likely(((__pyx_v_src) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_src, __pyx_memoryview_type))))) __PYX_ERR(1, 445, __pyx_L1_error) - __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(((struct __pyx_memoryview_obj *)__pyx_v_src), (&__pyx_v_src_slice)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 445, __pyx_L1_error) + if (!(likely(((__pyx_v_src) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_src, __pyx_memoryview_type))))) __PYX_ERR(1, 447, __pyx_L1_error) + __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(((struct __pyx_memoryview_obj *)__pyx_v_src), (&__pyx_v_src_slice)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 447, __pyx_L1_error) - /* "View.MemoryView":446 + /* "View.MemoryView":448 * * memoryview_copy_contents(get_slice_from_memview(src, &src_slice)[0], * get_slice_from_memview(dst, &dst_slice)[0], # <<<<<<<<<<<<<< * src.ndim, dst.ndim, self.dtype_is_object) * */ - if (!(likely(((__pyx_v_dst) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_dst, __pyx_memoryview_type))))) __PYX_ERR(1, 446, __pyx_L1_error) - __pyx_t_2 = __pyx_memoryview_get_slice_from_memoryview(((struct __pyx_memoryview_obj *)__pyx_v_dst), (&__pyx_v_dst_slice)); if (unlikely(__pyx_t_2 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 446, __pyx_L1_error) + if (!(likely(((__pyx_v_dst) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_dst, __pyx_memoryview_type))))) __PYX_ERR(1, 448, __pyx_L1_error) + __pyx_t_2 = __pyx_memoryview_get_slice_from_memoryview(((struct __pyx_memoryview_obj *)__pyx_v_dst), (&__pyx_v_dst_slice)); if (unlikely(__pyx_t_2 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 448, __pyx_L1_error) - /* "View.MemoryView":447 + /* "View.MemoryView":449 * memoryview_copy_contents(get_slice_from_memview(src, &src_slice)[0], * get_slice_from_memview(dst, &dst_slice)[0], * src.ndim, dst.ndim, self.dtype_is_object) # <<<<<<<<<<<<<< * * cdef setitem_slice_assign_scalar(self, memoryview dst, value): */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_src, __pyx_n_s_ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 447, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_src, __pyx_n_s_ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 447, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_4 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 449, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_dst, __pyx_n_s_ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 447, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_dst, __pyx_n_s_ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 449, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 447, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 449, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "View.MemoryView":445 + /* "View.MemoryView":447 * cdef __Pyx_memviewslice src_slice * * memoryview_copy_contents(get_slice_from_memview(src, &src_slice)[0], # <<<<<<<<<<<<<< * get_slice_from_memview(dst, &dst_slice)[0], * src.ndim, dst.ndim, self.dtype_is_object) */ - __pyx_t_6 = __pyx_memoryview_copy_contents((__pyx_t_1[0]), (__pyx_t_2[0]), __pyx_t_4, __pyx_t_5, __pyx_v_self->dtype_is_object); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(1, 445, __pyx_L1_error) + __pyx_t_6 = __pyx_memoryview_copy_contents((__pyx_t_1[0]), (__pyx_t_2[0]), __pyx_t_4, __pyx_t_5, __pyx_v_self->dtype_is_object); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(1, 447, __pyx_L1_error) - /* "View.MemoryView":441 + /* "View.MemoryView":443 * return obj * * cdef setitem_slice_assignment(self, dst, src): # <<<<<<<<<<<<<< @@ -12531,7 +12594,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assignment(struct __pyx_memoryvi return __pyx_r; } -/* "View.MemoryView":449 +/* "View.MemoryView":451 * src.ndim, dst.ndim, self.dtype_is_object) * * cdef setitem_slice_assign_scalar(self, memoryview dst, value): # <<<<<<<<<<<<<< @@ -12564,7 +12627,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor int __pyx_clineno = 0; __Pyx_RefNannySetupContext("setitem_slice_assign_scalar", 0); - /* "View.MemoryView":451 + /* "View.MemoryView":453 * cdef setitem_slice_assign_scalar(self, memoryview dst, value): * cdef int array[128] * cdef void *tmp = NULL # <<<<<<<<<<<<<< @@ -12573,17 +12636,17 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor */ __pyx_v_tmp = NULL; - /* "View.MemoryView":456 + /* "View.MemoryView":458 * cdef __Pyx_memviewslice *dst_slice * cdef __Pyx_memviewslice tmp_slice * dst_slice = get_slice_from_memview(dst, &tmp_slice) # <<<<<<<<<<<<<< * * if self.view.itemsize > sizeof(array): */ - __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_dst, (&__pyx_v_tmp_slice)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 456, __pyx_L1_error) + __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_dst, (&__pyx_v_tmp_slice)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 458, __pyx_L1_error) __pyx_v_dst_slice = __pyx_t_1; - /* "View.MemoryView":458 + /* "View.MemoryView":460 * dst_slice = get_slice_from_memview(dst, &tmp_slice) * * if self.view.itemsize > sizeof(array): # <<<<<<<<<<<<<< @@ -12593,7 +12656,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor __pyx_t_2 = ((((size_t)__pyx_v_self->view.itemsize) > (sizeof(__pyx_v_array))) != 0); if (__pyx_t_2) { - /* "View.MemoryView":459 + /* "View.MemoryView":461 * * if self.view.itemsize > sizeof(array): * tmp = PyMem_Malloc(self.view.itemsize) # <<<<<<<<<<<<<< @@ -12602,7 +12665,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor */ __pyx_v_tmp = PyMem_Malloc(__pyx_v_self->view.itemsize); - /* "View.MemoryView":460 + /* "View.MemoryView":462 * if self.view.itemsize > sizeof(array): * tmp = PyMem_Malloc(self.view.itemsize) * if tmp == NULL: # <<<<<<<<<<<<<< @@ -12612,16 +12675,16 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor __pyx_t_2 = ((__pyx_v_tmp == NULL) != 0); if (unlikely(__pyx_t_2)) { - /* "View.MemoryView":461 + /* "View.MemoryView":463 * tmp = PyMem_Malloc(self.view.itemsize) * if tmp == NULL: * raise MemoryError # <<<<<<<<<<<<<< * item = tmp * else: */ - PyErr_NoMemory(); __PYX_ERR(1, 461, __pyx_L1_error) + PyErr_NoMemory(); __PYX_ERR(1, 463, __pyx_L1_error) - /* "View.MemoryView":460 + /* "View.MemoryView":462 * if self.view.itemsize > sizeof(array): * tmp = PyMem_Malloc(self.view.itemsize) * if tmp == NULL: # <<<<<<<<<<<<<< @@ -12630,7 +12693,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor */ } - /* "View.MemoryView":462 + /* "View.MemoryView":464 * if tmp == NULL: * raise MemoryError * item = tmp # <<<<<<<<<<<<<< @@ -12639,7 +12702,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor */ __pyx_v_item = __pyx_v_tmp; - /* "View.MemoryView":458 + /* "View.MemoryView":460 * dst_slice = get_slice_from_memview(dst, &tmp_slice) * * if self.view.itemsize > sizeof(array): # <<<<<<<<<<<<<< @@ -12649,7 +12712,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor goto __pyx_L3; } - /* "View.MemoryView":464 + /* "View.MemoryView":466 * item = tmp * else: * item = array # <<<<<<<<<<<<<< @@ -12661,7 +12724,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor } __pyx_L3:; - /* "View.MemoryView":466 + /* "View.MemoryView":468 * item = array * * try: # <<<<<<<<<<<<<< @@ -12670,7 +12733,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor */ /*try:*/ { - /* "View.MemoryView":467 + /* "View.MemoryView":469 * * try: * if self.dtype_is_object: # <<<<<<<<<<<<<< @@ -12680,7 +12743,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor __pyx_t_2 = (__pyx_v_self->dtype_is_object != 0); if (__pyx_t_2) { - /* "View.MemoryView":468 + /* "View.MemoryView":470 * try: * if self.dtype_is_object: * ( item)[0] = value # <<<<<<<<<<<<<< @@ -12689,7 +12752,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor */ (((PyObject **)__pyx_v_item)[0]) = ((PyObject *)__pyx_v_value); - /* "View.MemoryView":467 + /* "View.MemoryView":469 * * try: * if self.dtype_is_object: # <<<<<<<<<<<<<< @@ -12699,7 +12762,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor goto __pyx_L8; } - /* "View.MemoryView":470 + /* "View.MemoryView":472 * ( item)[0] = value * else: * self.assign_item_from_object( item, value) # <<<<<<<<<<<<<< @@ -12707,13 +12770,13 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor * */ /*else*/ { - __pyx_t_3 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->assign_item_from_object(__pyx_v_self, ((char *)__pyx_v_item), __pyx_v_value); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 470, __pyx_L6_error) + __pyx_t_3 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->assign_item_from_object(__pyx_v_self, ((char *)__pyx_v_item), __pyx_v_value); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 472, __pyx_L6_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } __pyx_L8:; - /* "View.MemoryView":474 + /* "View.MemoryView":476 * * * if self.view.suboffsets != NULL: # <<<<<<<<<<<<<< @@ -12723,18 +12786,18 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor __pyx_t_2 = ((__pyx_v_self->view.suboffsets != NULL) != 0); if (__pyx_t_2) { - /* "View.MemoryView":475 + /* "View.MemoryView":477 * * if self.view.suboffsets != NULL: * assert_direct_dimensions(self.view.suboffsets, self.view.ndim) # <<<<<<<<<<<<<< * slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize, * item, self.dtype_is_object) */ - __pyx_t_3 = assert_direct_dimensions(__pyx_v_self->view.suboffsets, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 475, __pyx_L6_error) + __pyx_t_3 = assert_direct_dimensions(__pyx_v_self->view.suboffsets, __pyx_v_self->view.ndim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 477, __pyx_L6_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "View.MemoryView":474 + /* "View.MemoryView":476 * * * if self.view.suboffsets != NULL: # <<<<<<<<<<<<<< @@ -12743,7 +12806,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor */ } - /* "View.MemoryView":476 + /* "View.MemoryView":478 * if self.view.suboffsets != NULL: * assert_direct_dimensions(self.view.suboffsets, self.view.ndim) * slice_assign_scalar(dst_slice, dst.view.ndim, self.view.itemsize, # <<<<<<<<<<<<<< @@ -12753,7 +12816,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor __pyx_memoryview_slice_assign_scalar(__pyx_v_dst_slice, __pyx_v_dst->view.ndim, __pyx_v_self->view.itemsize, __pyx_v_item, __pyx_v_self->dtype_is_object); } - /* "View.MemoryView":479 + /* "View.MemoryView":481 * item, self.dtype_is_object) * finally: * PyMem_Free(tmp) # <<<<<<<<<<<<<< @@ -12800,7 +12863,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor __pyx_L7:; } - /* "View.MemoryView":449 + /* "View.MemoryView":451 * src.ndim, dst.ndim, self.dtype_is_object) * * cdef setitem_slice_assign_scalar(self, memoryview dst, value): # <<<<<<<<<<<<<< @@ -12821,7 +12884,7 @@ static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memor return __pyx_r; } -/* "View.MemoryView":481 +/* "View.MemoryView":483 * PyMem_Free(tmp) * * cdef setitem_indexed(self, index, value): # <<<<<<<<<<<<<< @@ -12840,28 +12903,28 @@ static PyObject *__pyx_memoryview_setitem_indexed(struct __pyx_memoryview_obj *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("setitem_indexed", 0); - /* "View.MemoryView":482 + /* "View.MemoryView":484 * * cdef setitem_indexed(self, index, value): * cdef char *itemp = self.get_item_pointer(index) # <<<<<<<<<<<<<< * self.assign_item_from_object(itemp, value) * */ - __pyx_t_1 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->get_item_pointer(__pyx_v_self, __pyx_v_index); if (unlikely(__pyx_t_1 == ((char *)NULL))) __PYX_ERR(1, 482, __pyx_L1_error) + __pyx_t_1 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->get_item_pointer(__pyx_v_self, __pyx_v_index); if (unlikely(__pyx_t_1 == ((char *)NULL))) __PYX_ERR(1, 484, __pyx_L1_error) __pyx_v_itemp = __pyx_t_1; - /* "View.MemoryView":483 + /* "View.MemoryView":485 * cdef setitem_indexed(self, index, value): * cdef char *itemp = self.get_item_pointer(index) * self.assign_item_from_object(itemp, value) # <<<<<<<<<<<<<< * * cdef convert_item_to_object(self, char *itemp): */ - __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->assign_item_from_object(__pyx_v_self, __pyx_v_itemp, __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 483, __pyx_L1_error) + __pyx_t_2 = ((struct __pyx_vtabstruct_memoryview *)__pyx_v_self->__pyx_vtab)->assign_item_from_object(__pyx_v_self, __pyx_v_itemp, __pyx_v_value); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 485, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "View.MemoryView":481 + /* "View.MemoryView":483 * PyMem_Free(tmp) * * cdef setitem_indexed(self, index, value): # <<<<<<<<<<<<<< @@ -12882,7 +12945,7 @@ static PyObject *__pyx_memoryview_setitem_indexed(struct __pyx_memoryview_obj *_ return __pyx_r; } -/* "View.MemoryView":485 +/* "View.MemoryView":487 * self.assign_item_from_object(itemp, value) * * cdef convert_item_to_object(self, char *itemp): # <<<<<<<<<<<<<< @@ -12912,31 +12975,31 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview int __pyx_clineno = 0; __Pyx_RefNannySetupContext("convert_item_to_object", 0); - /* "View.MemoryView":488 + /* "View.MemoryView":490 * """Only used if instantiated manually by the user, or if Cython doesn't * know how to convert the type""" * import struct # <<<<<<<<<<<<<< * cdef bytes bytesitem * */ - __pyx_t_1 = __Pyx_Import(__pyx_n_s_struct, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 488, __pyx_L1_error) + __pyx_t_1 = __Pyx_Import(__pyx_n_s_struct, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 490, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_v_struct = __pyx_t_1; __pyx_t_1 = 0; - /* "View.MemoryView":491 + /* "View.MemoryView":493 * cdef bytes bytesitem * * bytesitem = itemp[:self.view.itemsize] # <<<<<<<<<<<<<< * try: * result = struct.unpack(self.view.format, bytesitem) */ - __pyx_t_1 = __Pyx_PyBytes_FromStringAndSize(__pyx_v_itemp + 0, __pyx_v_self->view.itemsize - 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 491, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyBytes_FromStringAndSize(__pyx_v_itemp + 0, __pyx_v_self->view.itemsize - 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 493, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_v_bytesitem = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; - /* "View.MemoryView":492 + /* "View.MemoryView":494 * * bytesitem = itemp[:self.view.itemsize] * try: # <<<<<<<<<<<<<< @@ -12952,16 +13015,16 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview __Pyx_XGOTREF(__pyx_t_4); /*try:*/ { - /* "View.MemoryView":493 + /* "View.MemoryView":495 * bytesitem = itemp[:self.view.itemsize] * try: * result = struct.unpack(self.view.format, bytesitem) # <<<<<<<<<<<<<< * except struct.error: * raise ValueError("Unable to convert item to object") */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_unpack); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 493, __pyx_L3_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_unpack); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 495, __pyx_L3_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 493, __pyx_L3_error) + __pyx_t_6 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 495, __pyx_L3_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_7 = NULL; __pyx_t_8 = 0; @@ -12978,7 +13041,7 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_5)) { PyObject *__pyx_temp[3] = {__pyx_t_7, __pyx_t_6, __pyx_v_bytesitem}; - __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_5, __pyx_temp+1-__pyx_t_8, 2+__pyx_t_8); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 493, __pyx_L3_error) + __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_5, __pyx_temp+1-__pyx_t_8, 2+__pyx_t_8); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 495, __pyx_L3_error) __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; @@ -12987,14 +13050,14 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_5)) { PyObject *__pyx_temp[3] = {__pyx_t_7, __pyx_t_6, __pyx_v_bytesitem}; - __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_5, __pyx_temp+1-__pyx_t_8, 2+__pyx_t_8); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 493, __pyx_L3_error) + __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_5, __pyx_temp+1-__pyx_t_8, 2+__pyx_t_8); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 495, __pyx_L3_error) __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; } else #endif { - __pyx_t_9 = PyTuple_New(2+__pyx_t_8); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 493, __pyx_L3_error) + __pyx_t_9 = PyTuple_New(2+__pyx_t_8); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 495, __pyx_L3_error) __Pyx_GOTREF(__pyx_t_9); if (__pyx_t_7) { __Pyx_GIVEREF(__pyx_t_7); PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_t_7); __pyx_t_7 = NULL; @@ -13005,7 +13068,7 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview __Pyx_GIVEREF(__pyx_v_bytesitem); PyTuple_SET_ITEM(__pyx_t_9, 1+__pyx_t_8, __pyx_v_bytesitem); __pyx_t_6 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_t_9, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 493, __pyx_L3_error) + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_t_9, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 495, __pyx_L3_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; } @@ -13013,7 +13076,7 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview __pyx_v_result = __pyx_t_1; __pyx_t_1 = 0; - /* "View.MemoryView":492 + /* "View.MemoryView":494 * * bytesitem = itemp[:self.view.itemsize] * try: # <<<<<<<<<<<<<< @@ -13022,7 +13085,7 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview */ } - /* "View.MemoryView":497 + /* "View.MemoryView":499 * raise ValueError("Unable to convert item to object") * else: * if len(self.view.format) == 1: # <<<<<<<<<<<<<< @@ -13034,7 +13097,7 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview __pyx_t_11 = ((__pyx_t_10 == 1) != 0); if (__pyx_t_11) { - /* "View.MemoryView":498 + /* "View.MemoryView":500 * else: * if len(self.view.format) == 1: * return result[0] # <<<<<<<<<<<<<< @@ -13042,13 +13105,13 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_result, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 498, __pyx_L5_except_error) + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_result, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 500, __pyx_L5_except_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L6_except_return; - /* "View.MemoryView":497 + /* "View.MemoryView":499 * raise ValueError("Unable to convert item to object") * else: * if len(self.view.format) == 1: # <<<<<<<<<<<<<< @@ -13057,7 +13120,7 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview */ } - /* "View.MemoryView":499 + /* "View.MemoryView":501 * if len(self.view.format) == 1: * return result[0] * return result # <<<<<<<<<<<<<< @@ -13076,7 +13139,7 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0; - /* "View.MemoryView":494 + /* "View.MemoryView":496 * try: * result = struct.unpack(self.view.format, bytesitem) * except struct.error: # <<<<<<<<<<<<<< @@ -13084,7 +13147,7 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview * else: */ __Pyx_ErrFetch(&__pyx_t_1, &__pyx_t_5, &__pyx_t_9); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_error); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 494, __pyx_L5_except_error) + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_error); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 496, __pyx_L5_except_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_8 = __Pyx_PyErr_GivenExceptionMatches(__pyx_t_1, __pyx_t_6); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; @@ -13092,28 +13155,28 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview __pyx_t_1 = 0; __pyx_t_5 = 0; __pyx_t_9 = 0; if (__pyx_t_8) { __Pyx_AddTraceback("View.MemoryView.memoryview.convert_item_to_object", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_9, &__pyx_t_5, &__pyx_t_1) < 0) __PYX_ERR(1, 494, __pyx_L5_except_error) + if (__Pyx_GetException(&__pyx_t_9, &__pyx_t_5, &__pyx_t_1) < 0) __PYX_ERR(1, 496, __pyx_L5_except_error) __Pyx_GOTREF(__pyx_t_9); __Pyx_GOTREF(__pyx_t_5); __Pyx_GOTREF(__pyx_t_1); - /* "View.MemoryView":495 + /* "View.MemoryView":497 * result = struct.unpack(self.view.format, bytesitem) * except struct.error: * raise ValueError("Unable to convert item to object") # <<<<<<<<<<<<<< * else: * if len(self.view.format) == 1: */ - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__24, NULL); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 495, __pyx_L5_except_error) + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__24, NULL); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 497, __pyx_L5_except_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_Raise(__pyx_t_6, 0, 0, 0); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __PYX_ERR(1, 495, __pyx_L5_except_error) + __PYX_ERR(1, 497, __pyx_L5_except_error) } goto __pyx_L5_except_error; __pyx_L5_except_error:; - /* "View.MemoryView":492 + /* "View.MemoryView":494 * * bytesitem = itemp[:self.view.itemsize] * try: # <<<<<<<<<<<<<< @@ -13133,7 +13196,7 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview goto __pyx_L0; } - /* "View.MemoryView":485 + /* "View.MemoryView":487 * self.assign_item_from_object(itemp, value) * * cdef convert_item_to_object(self, char *itemp): # <<<<<<<<<<<<<< @@ -13159,7 +13222,7 @@ static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview return __pyx_r; } -/* "View.MemoryView":501 +/* "View.MemoryView":503 * return result * * cdef assign_item_from_object(self, char *itemp, object value): # <<<<<<<<<<<<<< @@ -13193,19 +13256,19 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie int __pyx_clineno = 0; __Pyx_RefNannySetupContext("assign_item_from_object", 0); - /* "View.MemoryView":504 + /* "View.MemoryView":506 * """Only used if instantiated manually by the user, or if Cython doesn't * know how to convert the type""" * import struct # <<<<<<<<<<<<<< * cdef char c * cdef bytes bytesvalue */ - __pyx_t_1 = __Pyx_Import(__pyx_n_s_struct, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 504, __pyx_L1_error) + __pyx_t_1 = __Pyx_Import(__pyx_n_s_struct, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 506, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_v_struct = __pyx_t_1; __pyx_t_1 = 0; - /* "View.MemoryView":509 + /* "View.MemoryView":511 * cdef Py_ssize_t i * * if isinstance(value, tuple): # <<<<<<<<<<<<<< @@ -13216,37 +13279,37 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie __pyx_t_3 = (__pyx_t_2 != 0); if (__pyx_t_3) { - /* "View.MemoryView":510 + /* "View.MemoryView":512 * * if isinstance(value, tuple): * bytesvalue = struct.pack(self.view.format, *value) # <<<<<<<<<<<<<< * else: * bytesvalue = struct.pack(self.view.format, value) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_pack); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 510, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_pack); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 510, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 510, __pyx_L1_error) + __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PySequence_Tuple(__pyx_v_value); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 510, __pyx_L1_error) + __pyx_t_4 = __Pyx_PySequence_Tuple(__pyx_v_value); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_6 = PyNumber_Add(__pyx_t_5, __pyx_t_4); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 510, __pyx_L1_error) + __pyx_t_6 = PyNumber_Add(__pyx_t_5, __pyx_t_4); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_6, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 510, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_6, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 510, __pyx_L1_error) + if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 512, __pyx_L1_error) __pyx_v_bytesvalue = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; - /* "View.MemoryView":509 + /* "View.MemoryView":511 * cdef Py_ssize_t i * * if isinstance(value, tuple): # <<<<<<<<<<<<<< @@ -13256,7 +13319,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie goto __pyx_L3; } - /* "View.MemoryView":512 + /* "View.MemoryView":514 * bytesvalue = struct.pack(self.view.format, *value) * else: * bytesvalue = struct.pack(self.view.format, value) # <<<<<<<<<<<<<< @@ -13264,9 +13327,9 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie * for i, c in enumerate(bytesvalue): */ /*else*/ { - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_pack); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 512, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_struct, __pyx_n_s_pack); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 514, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); - __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 512, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyBytes_FromString(__pyx_v_self->view.format); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 514, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_5 = NULL; __pyx_t_7 = 0; @@ -13283,7 +13346,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_6)) { PyObject *__pyx_temp[3] = {__pyx_t_5, __pyx_t_1, __pyx_v_value}; - __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 512, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 514, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -13292,14 +13355,14 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_6)) { PyObject *__pyx_temp[3] = {__pyx_t_5, __pyx_t_1, __pyx_v_value}; - __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 512, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 514, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } else #endif { - __pyx_t_8 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 512, __pyx_L1_error) + __pyx_t_8 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 514, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_8); if (__pyx_t_5) { __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_5); __pyx_t_5 = NULL; @@ -13310,18 +13373,18 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie __Pyx_GIVEREF(__pyx_v_value); PyTuple_SET_ITEM(__pyx_t_8, 1+__pyx_t_7, __pyx_v_value); __pyx_t_1 = 0; - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_8, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 512, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_8, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 514, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 512, __pyx_L1_error) + if (!(likely(PyBytes_CheckExact(__pyx_t_4))||((__pyx_t_4) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_t_4)->tp_name), 0))) __PYX_ERR(1, 514, __pyx_L1_error) __pyx_v_bytesvalue = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; } __pyx_L3:; - /* "View.MemoryView":514 + /* "View.MemoryView":516 * bytesvalue = struct.pack(self.view.format, value) * * for i, c in enumerate(bytesvalue): # <<<<<<<<<<<<<< @@ -13331,7 +13394,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie __pyx_t_9 = 0; if (unlikely(__pyx_v_bytesvalue == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' is not iterable"); - __PYX_ERR(1, 514, __pyx_L1_error) + __PYX_ERR(1, 516, __pyx_L1_error) } __Pyx_INCREF(__pyx_v_bytesvalue); __pyx_t_10 = __pyx_v_bytesvalue; @@ -13341,7 +13404,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie __pyx_t_11 = __pyx_t_14; __pyx_v_c = (__pyx_t_11[0]); - /* "View.MemoryView":515 + /* "View.MemoryView":517 * * for i, c in enumerate(bytesvalue): * itemp[i] = c # <<<<<<<<<<<<<< @@ -13350,7 +13413,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie */ __pyx_v_i = __pyx_t_9; - /* "View.MemoryView":514 + /* "View.MemoryView":516 * bytesvalue = struct.pack(self.view.format, value) * * for i, c in enumerate(bytesvalue): # <<<<<<<<<<<<<< @@ -13359,7 +13422,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie */ __pyx_t_9 = (__pyx_t_9 + 1); - /* "View.MemoryView":515 + /* "View.MemoryView":517 * * for i, c in enumerate(bytesvalue): * itemp[i] = c # <<<<<<<<<<<<<< @@ -13370,7 +13433,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie } __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "View.MemoryView":501 + /* "View.MemoryView":503 * return result * * cdef assign_item_from_object(self, char *itemp, object value): # <<<<<<<<<<<<<< @@ -13398,7 +13461,7 @@ static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryvie return __pyx_r; } -/* "View.MemoryView":518 +/* "View.MemoryView":520 * * @cname('getbuffer') * def __getbuffer__(self, Py_buffer *info, int flags): # <<<<<<<<<<<<<< @@ -13441,7 +13504,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None); __Pyx_GIVEREF(__pyx_v_info->obj); - /* "View.MemoryView":519 + /* "View.MemoryView":521 * @cname('getbuffer') * def __getbuffer__(self, Py_buffer *info, int flags): * if flags & PyBUF_WRITABLE and self.view.readonly: # <<<<<<<<<<<<<< @@ -13459,20 +13522,20 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_L4_bool_binop_done:; if (unlikely(__pyx_t_1)) { - /* "View.MemoryView":520 + /* "View.MemoryView":522 * def __getbuffer__(self, Py_buffer *info, int flags): * if flags & PyBUF_WRITABLE and self.view.readonly: * raise ValueError("Cannot create writable memory view from read-only memoryview") # <<<<<<<<<<<<<< * * if flags & PyBUF_ND: */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__25, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 520, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__25, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 522, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __PYX_ERR(1, 520, __pyx_L1_error) + __PYX_ERR(1, 522, __pyx_L1_error) - /* "View.MemoryView":519 + /* "View.MemoryView":521 * @cname('getbuffer') * def __getbuffer__(self, Py_buffer *info, int flags): * if flags & PyBUF_WRITABLE and self.view.readonly: # <<<<<<<<<<<<<< @@ -13481,7 +13544,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu */ } - /* "View.MemoryView":522 + /* "View.MemoryView":524 * raise ValueError("Cannot create writable memory view from read-only memoryview") * * if flags & PyBUF_ND: # <<<<<<<<<<<<<< @@ -13491,7 +13554,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_1 = ((__pyx_v_flags & PyBUF_ND) != 0); if (__pyx_t_1) { - /* "View.MemoryView":523 + /* "View.MemoryView":525 * * if flags & PyBUF_ND: * info.shape = self.view.shape # <<<<<<<<<<<<<< @@ -13501,7 +13564,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_4 = __pyx_v_self->view.shape; __pyx_v_info->shape = __pyx_t_4; - /* "View.MemoryView":522 + /* "View.MemoryView":524 * raise ValueError("Cannot create writable memory view from read-only memoryview") * * if flags & PyBUF_ND: # <<<<<<<<<<<<<< @@ -13511,7 +13574,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu goto __pyx_L6; } - /* "View.MemoryView":525 + /* "View.MemoryView":527 * info.shape = self.view.shape * else: * info.shape = NULL # <<<<<<<<<<<<<< @@ -13523,7 +13586,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu } __pyx_L6:; - /* "View.MemoryView":527 + /* "View.MemoryView":529 * info.shape = NULL * * if flags & PyBUF_STRIDES: # <<<<<<<<<<<<<< @@ -13533,7 +13596,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_1 = ((__pyx_v_flags & PyBUF_STRIDES) != 0); if (__pyx_t_1) { - /* "View.MemoryView":528 + /* "View.MemoryView":530 * * if flags & PyBUF_STRIDES: * info.strides = self.view.strides # <<<<<<<<<<<<<< @@ -13543,7 +13606,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_4 = __pyx_v_self->view.strides; __pyx_v_info->strides = __pyx_t_4; - /* "View.MemoryView":527 + /* "View.MemoryView":529 * info.shape = NULL * * if flags & PyBUF_STRIDES: # <<<<<<<<<<<<<< @@ -13553,7 +13616,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu goto __pyx_L7; } - /* "View.MemoryView":530 + /* "View.MemoryView":532 * info.strides = self.view.strides * else: * info.strides = NULL # <<<<<<<<<<<<<< @@ -13565,7 +13628,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu } __pyx_L7:; - /* "View.MemoryView":532 + /* "View.MemoryView":534 * info.strides = NULL * * if flags & PyBUF_INDIRECT: # <<<<<<<<<<<<<< @@ -13575,7 +13638,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_1 = ((__pyx_v_flags & PyBUF_INDIRECT) != 0); if (__pyx_t_1) { - /* "View.MemoryView":533 + /* "View.MemoryView":535 * * if flags & PyBUF_INDIRECT: * info.suboffsets = self.view.suboffsets # <<<<<<<<<<<<<< @@ -13585,7 +13648,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_4 = __pyx_v_self->view.suboffsets; __pyx_v_info->suboffsets = __pyx_t_4; - /* "View.MemoryView":532 + /* "View.MemoryView":534 * info.strides = NULL * * if flags & PyBUF_INDIRECT: # <<<<<<<<<<<<<< @@ -13595,7 +13658,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu goto __pyx_L8; } - /* "View.MemoryView":535 + /* "View.MemoryView":537 * info.suboffsets = self.view.suboffsets * else: * info.suboffsets = NULL # <<<<<<<<<<<<<< @@ -13607,7 +13670,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu } __pyx_L8:; - /* "View.MemoryView":537 + /* "View.MemoryView":539 * info.suboffsets = NULL * * if flags & PyBUF_FORMAT: # <<<<<<<<<<<<<< @@ -13617,7 +13680,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_1 = ((__pyx_v_flags & PyBUF_FORMAT) != 0); if (__pyx_t_1) { - /* "View.MemoryView":538 + /* "View.MemoryView":540 * * if flags & PyBUF_FORMAT: * info.format = self.view.format # <<<<<<<<<<<<<< @@ -13627,7 +13690,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_5 = __pyx_v_self->view.format; __pyx_v_info->format = __pyx_t_5; - /* "View.MemoryView":537 + /* "View.MemoryView":539 * info.suboffsets = NULL * * if flags & PyBUF_FORMAT: # <<<<<<<<<<<<<< @@ -13637,7 +13700,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu goto __pyx_L9; } - /* "View.MemoryView":540 + /* "View.MemoryView":542 * info.format = self.view.format * else: * info.format = NULL # <<<<<<<<<<<<<< @@ -13649,7 +13712,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu } __pyx_L9:; - /* "View.MemoryView":542 + /* "View.MemoryView":544 * info.format = NULL * * info.buf = self.view.buf # <<<<<<<<<<<<<< @@ -13659,7 +13722,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_6 = __pyx_v_self->view.buf; __pyx_v_info->buf = __pyx_t_6; - /* "View.MemoryView":543 + /* "View.MemoryView":545 * * info.buf = self.view.buf * info.ndim = self.view.ndim # <<<<<<<<<<<<<< @@ -13669,7 +13732,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_7 = __pyx_v_self->view.ndim; __pyx_v_info->ndim = __pyx_t_7; - /* "View.MemoryView":544 + /* "View.MemoryView":546 * info.buf = self.view.buf * info.ndim = self.view.ndim * info.itemsize = self.view.itemsize # <<<<<<<<<<<<<< @@ -13679,7 +13742,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_8 = __pyx_v_self->view.itemsize; __pyx_v_info->itemsize = __pyx_t_8; - /* "View.MemoryView":545 + /* "View.MemoryView":547 * info.ndim = self.view.ndim * info.itemsize = self.view.itemsize * info.len = self.view.len # <<<<<<<<<<<<<< @@ -13689,7 +13752,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_8 = __pyx_v_self->view.len; __pyx_v_info->len = __pyx_t_8; - /* "View.MemoryView":546 + /* "View.MemoryView":548 * info.itemsize = self.view.itemsize * info.len = self.view.len * info.readonly = self.view.readonly # <<<<<<<<<<<<<< @@ -13699,7 +13762,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __pyx_t_1 = __pyx_v_self->view.readonly; __pyx_v_info->readonly = __pyx_t_1; - /* "View.MemoryView":547 + /* "View.MemoryView":549 * info.len = self.view.len * info.readonly = self.view.readonly * info.obj = self # <<<<<<<<<<<<<< @@ -13712,7 +13775,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = ((PyObject *)__pyx_v_self); - /* "View.MemoryView":518 + /* "View.MemoryView":520 * * @cname('getbuffer') * def __getbuffer__(self, Py_buffer *info, int flags): # <<<<<<<<<<<<<< @@ -13742,7 +13805,7 @@ static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbu return __pyx_r; } -/* "View.MemoryView":553 +/* "View.MemoryView":555 * * @property * def T(self): # <<<<<<<<<<<<<< @@ -13774,29 +13837,29 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_1T___get__(struct _ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "View.MemoryView":554 + /* "View.MemoryView":556 * @property * def T(self): * cdef _memoryviewslice result = memoryview_copy(self) # <<<<<<<<<<<<<< * transpose_memslice(&result.from_slice) * return result */ - __pyx_t_1 = __pyx_memoryview_copy_object(__pyx_v_self); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 554, __pyx_L1_error) + __pyx_t_1 = __pyx_memoryview_copy_object(__pyx_v_self); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 556, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_memoryviewslice_type))))) __PYX_ERR(1, 554, __pyx_L1_error) + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_memoryviewslice_type))))) __PYX_ERR(1, 556, __pyx_L1_error) __pyx_v_result = ((struct __pyx_memoryviewslice_obj *)__pyx_t_1); __pyx_t_1 = 0; - /* "View.MemoryView":555 + /* "View.MemoryView":557 * def T(self): * cdef _memoryviewslice result = memoryview_copy(self) * transpose_memslice(&result.from_slice) # <<<<<<<<<<<<<< * return result * */ - __pyx_t_2 = __pyx_memslice_transpose((&__pyx_v_result->from_slice)); if (unlikely(__pyx_t_2 == ((int)0))) __PYX_ERR(1, 555, __pyx_L1_error) + __pyx_t_2 = __pyx_memslice_transpose((&__pyx_v_result->from_slice)); if (unlikely(__pyx_t_2 == ((int)0))) __PYX_ERR(1, 557, __pyx_L1_error) - /* "View.MemoryView":556 + /* "View.MemoryView":558 * cdef _memoryviewslice result = memoryview_copy(self) * transpose_memslice(&result.from_slice) * return result # <<<<<<<<<<<<<< @@ -13808,7 +13871,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_1T___get__(struct _ __pyx_r = ((PyObject *)__pyx_v_result); goto __pyx_L0; - /* "View.MemoryView":553 + /* "View.MemoryView":555 * * @property * def T(self): # <<<<<<<<<<<<<< @@ -13828,7 +13891,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_1T___get__(struct _ return __pyx_r; } -/* "View.MemoryView":559 +/* "View.MemoryView":561 * * @property * def base(self): # <<<<<<<<<<<<<< @@ -13854,7 +13917,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4base___get__(struc __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__get__", 0); - /* "View.MemoryView":560 + /* "View.MemoryView":562 * @property * def base(self): * return self.obj # <<<<<<<<<<<<<< @@ -13866,7 +13929,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4base___get__(struc __pyx_r = __pyx_v_self->obj; goto __pyx_L0; - /* "View.MemoryView":559 + /* "View.MemoryView":561 * * @property * def base(self): # <<<<<<<<<<<<<< @@ -13881,7 +13944,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4base___get__(struc return __pyx_r; } -/* "View.MemoryView":563 +/* "View.MemoryView":565 * * @property * def shape(self): # <<<<<<<<<<<<<< @@ -13916,7 +13979,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_5shape___get__(stru int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "View.MemoryView":564 + /* "View.MemoryView":566 * @property * def shape(self): * return tuple([length for length in self.view.shape[:self.view.ndim]]) # <<<<<<<<<<<<<< @@ -13924,25 +13987,25 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_5shape___get__(stru * @property */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 564, __pyx_L1_error) + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 566, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = (__pyx_v_self->view.shape + __pyx_v_self->view.ndim); for (__pyx_t_4 = __pyx_v_self->view.shape; __pyx_t_4 < __pyx_t_3; __pyx_t_4++) { __pyx_t_2 = __pyx_t_4; __pyx_v_length = (__pyx_t_2[0]); - __pyx_t_5 = PyInt_FromSsize_t(__pyx_v_length); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 564, __pyx_L1_error) + __pyx_t_5 = PyInt_FromSsize_t(__pyx_v_length); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 566, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_5))) __PYX_ERR(1, 564, __pyx_L1_error) + if (unlikely(__Pyx_ListComp_Append(__pyx_t_1, (PyObject*)__pyx_t_5))) __PYX_ERR(1, 566, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; } - __pyx_t_5 = PyList_AsTuple(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 564, __pyx_L1_error) + __pyx_t_5 = PyList_AsTuple(((PyObject*)__pyx_t_1)); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 566, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_r = __pyx_t_5; __pyx_t_5 = 0; goto __pyx_L0; - /* "View.MemoryView":563 + /* "View.MemoryView":565 * * @property * def shape(self): # <<<<<<<<<<<<<< @@ -13962,7 +14025,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_5shape___get__(stru return __pyx_r; } -/* "View.MemoryView":567 +/* "View.MemoryView":569 * * @property * def strides(self): # <<<<<<<<<<<<<< @@ -13998,7 +14061,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(st int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "View.MemoryView":568 + /* "View.MemoryView":570 * @property * def strides(self): * if self.view.strides == NULL: # <<<<<<<<<<<<<< @@ -14008,20 +14071,20 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(st __pyx_t_1 = ((__pyx_v_self->view.strides == NULL) != 0); if (unlikely(__pyx_t_1)) { - /* "View.MemoryView":570 + /* "View.MemoryView":572 * if self.view.strides == NULL: * * raise ValueError("Buffer view does not expose strides") # <<<<<<<<<<<<<< * * return tuple([stride for stride in self.view.strides[:self.view.ndim]]) */ - __pyx_t_2 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__26, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 570, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__26, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 572, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_Raise(__pyx_t_2, 0, 0, 0); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __PYX_ERR(1, 570, __pyx_L1_error) + __PYX_ERR(1, 572, __pyx_L1_error) - /* "View.MemoryView":568 + /* "View.MemoryView":570 * @property * def strides(self): * if self.view.strides == NULL: # <<<<<<<<<<<<<< @@ -14030,7 +14093,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(st */ } - /* "View.MemoryView":572 + /* "View.MemoryView":574 * raise ValueError("Buffer view does not expose strides") * * return tuple([stride for stride in self.view.strides[:self.view.ndim]]) # <<<<<<<<<<<<<< @@ -14038,25 +14101,25 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(st * @property */ __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = PyList_New(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 572, __pyx_L1_error) + __pyx_t_2 = PyList_New(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 574, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_4 = (__pyx_v_self->view.strides + __pyx_v_self->view.ndim); for (__pyx_t_5 = __pyx_v_self->view.strides; __pyx_t_5 < __pyx_t_4; __pyx_t_5++) { __pyx_t_3 = __pyx_t_5; __pyx_v_stride = (__pyx_t_3[0]); - __pyx_t_6 = PyInt_FromSsize_t(__pyx_v_stride); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 572, __pyx_L1_error) + __pyx_t_6 = PyInt_FromSsize_t(__pyx_v_stride); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 574, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); - if (unlikely(__Pyx_ListComp_Append(__pyx_t_2, (PyObject*)__pyx_t_6))) __PYX_ERR(1, 572, __pyx_L1_error) + if (unlikely(__Pyx_ListComp_Append(__pyx_t_2, (PyObject*)__pyx_t_6))) __PYX_ERR(1, 574, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; } - __pyx_t_6 = PyList_AsTuple(((PyObject*)__pyx_t_2)); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 572, __pyx_L1_error) + __pyx_t_6 = PyList_AsTuple(((PyObject*)__pyx_t_2)); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 574, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_r = __pyx_t_6; __pyx_t_6 = 0; goto __pyx_L0; - /* "View.MemoryView":567 + /* "View.MemoryView":569 * * @property * def strides(self): # <<<<<<<<<<<<<< @@ -14076,7 +14139,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(st return __pyx_r; } -/* "View.MemoryView":575 +/* "View.MemoryView":577 * * @property * def suboffsets(self): # <<<<<<<<<<<<<< @@ -14112,7 +14175,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "View.MemoryView":576 + /* "View.MemoryView":578 * @property * def suboffsets(self): * if self.view.suboffsets == NULL: # <<<<<<<<<<<<<< @@ -14122,7 +14185,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get_ __pyx_t_1 = ((__pyx_v_self->view.suboffsets == NULL) != 0); if (__pyx_t_1) { - /* "View.MemoryView":577 + /* "View.MemoryView":579 * def suboffsets(self): * if self.view.suboffsets == NULL: * return (-1,) * self.view.ndim # <<<<<<<<<<<<<< @@ -14130,16 +14193,16 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get_ * return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]]) */ __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_self->view.ndim); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 577, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_self->view.ndim); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 579, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyNumber_Multiply(__pyx_tuple__27, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 577, __pyx_L1_error) + __pyx_t_3 = PyNumber_Multiply(__pyx_tuple__27, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 579, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_r = __pyx_t_3; __pyx_t_3 = 0; goto __pyx_L0; - /* "View.MemoryView":576 + /* "View.MemoryView":578 * @property * def suboffsets(self): * if self.view.suboffsets == NULL: # <<<<<<<<<<<<<< @@ -14148,7 +14211,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get_ */ } - /* "View.MemoryView":579 + /* "View.MemoryView":581 * return (-1,) * self.view.ndim * * return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]]) # <<<<<<<<<<<<<< @@ -14156,25 +14219,25 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get_ * @property */ __Pyx_XDECREF(__pyx_r); - __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 579, __pyx_L1_error) + __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 581, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_5 = (__pyx_v_self->view.suboffsets + __pyx_v_self->view.ndim); for (__pyx_t_6 = __pyx_v_self->view.suboffsets; __pyx_t_6 < __pyx_t_5; __pyx_t_6++) { __pyx_t_4 = __pyx_t_6; __pyx_v_suboffset = (__pyx_t_4[0]); - __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_suboffset); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 579, __pyx_L1_error) + __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_suboffset); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 581, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - if (unlikely(__Pyx_ListComp_Append(__pyx_t_3, (PyObject*)__pyx_t_2))) __PYX_ERR(1, 579, __pyx_L1_error) + if (unlikely(__Pyx_ListComp_Append(__pyx_t_3, (PyObject*)__pyx_t_2))) __PYX_ERR(1, 581, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } - __pyx_t_2 = PyList_AsTuple(((PyObject*)__pyx_t_3)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 579, __pyx_L1_error) + __pyx_t_2 = PyList_AsTuple(((PyObject*)__pyx_t_3)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 581, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "View.MemoryView":575 + /* "View.MemoryView":577 * * @property * def suboffsets(self): # <<<<<<<<<<<<<< @@ -14194,7 +14257,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get_ return __pyx_r; } -/* "View.MemoryView":582 +/* "View.MemoryView":584 * * @property * def ndim(self): # <<<<<<<<<<<<<< @@ -14224,7 +14287,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4ndim___get__(struc int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "View.MemoryView":583 + /* "View.MemoryView":585 * @property * def ndim(self): * return self.view.ndim # <<<<<<<<<<<<<< @@ -14232,13 +14295,13 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4ndim___get__(struc * @property */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->view.ndim); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 583, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_self->view.ndim); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 585, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "View.MemoryView":582 + /* "View.MemoryView":584 * * @property * def ndim(self): # <<<<<<<<<<<<<< @@ -14257,7 +14320,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4ndim___get__(struc return __pyx_r; } -/* "View.MemoryView":586 +/* "View.MemoryView":588 * * @property * def itemsize(self): # <<<<<<<<<<<<<< @@ -14287,7 +14350,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_8itemsize___get__(s int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "View.MemoryView":587 + /* "View.MemoryView":589 * @property * def itemsize(self): * return self.view.itemsize # <<<<<<<<<<<<<< @@ -14295,13 +14358,13 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_8itemsize___get__(s * @property */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyInt_FromSsize_t(__pyx_v_self->view.itemsize); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 587, __pyx_L1_error) + __pyx_t_1 = PyInt_FromSsize_t(__pyx_v_self->view.itemsize); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 589, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "View.MemoryView":586 + /* "View.MemoryView":588 * * @property * def itemsize(self): # <<<<<<<<<<<<<< @@ -14320,7 +14383,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_8itemsize___get__(s return __pyx_r; } -/* "View.MemoryView":590 +/* "View.MemoryView":592 * * @property * def nbytes(self): # <<<<<<<<<<<<<< @@ -14352,7 +14415,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___get__(str int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "View.MemoryView":591 + /* "View.MemoryView":593 * @property * def nbytes(self): * return self.size * self.view.itemsize # <<<<<<<<<<<<<< @@ -14360,11 +14423,11 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___get__(str * @property */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 591, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 593, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_self->view.itemsize); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 591, __pyx_L1_error) + __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_self->view.itemsize); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 593, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyNumber_Multiply(__pyx_t_1, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 591, __pyx_L1_error) + __pyx_t_3 = PyNumber_Multiply(__pyx_t_1, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 593, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -14372,7 +14435,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___get__(str __pyx_t_3 = 0; goto __pyx_L0; - /* "View.MemoryView":590 + /* "View.MemoryView":592 * * @property * def nbytes(self): # <<<<<<<<<<<<<< @@ -14393,7 +14456,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___get__(str return __pyx_r; } -/* "View.MemoryView":594 +/* "View.MemoryView":596 * * @property * def size(self): # <<<<<<<<<<<<<< @@ -14430,7 +14493,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struc int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__get__", 0); - /* "View.MemoryView":595 + /* "View.MemoryView":597 * @property * def size(self): * if self._size is None: # <<<<<<<<<<<<<< @@ -14441,7 +14504,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struc __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "View.MemoryView":596 + /* "View.MemoryView":598 * def size(self): * if self._size is None: * result = 1 # <<<<<<<<<<<<<< @@ -14451,7 +14514,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struc __Pyx_INCREF(__pyx_int_1); __pyx_v_result = __pyx_int_1; - /* "View.MemoryView":598 + /* "View.MemoryView":600 * result = 1 * * for length in self.view.shape[:self.view.ndim]: # <<<<<<<<<<<<<< @@ -14461,25 +14524,25 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struc __pyx_t_4 = (__pyx_v_self->view.shape + __pyx_v_self->view.ndim); for (__pyx_t_5 = __pyx_v_self->view.shape; __pyx_t_5 < __pyx_t_4; __pyx_t_5++) { __pyx_t_3 = __pyx_t_5; - __pyx_t_6 = PyInt_FromSsize_t((__pyx_t_3[0])); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 598, __pyx_L1_error) + __pyx_t_6 = PyInt_FromSsize_t((__pyx_t_3[0])); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 600, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_length, __pyx_t_6); __pyx_t_6 = 0; - /* "View.MemoryView":599 + /* "View.MemoryView":601 * * for length in self.view.shape[:self.view.ndim]: * result *= length # <<<<<<<<<<<<<< * * self._size = result */ - __pyx_t_6 = PyNumber_InPlaceMultiply(__pyx_v_result, __pyx_v_length); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 599, __pyx_L1_error) + __pyx_t_6 = PyNumber_InPlaceMultiply(__pyx_v_result, __pyx_v_length); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 601, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF_SET(__pyx_v_result, __pyx_t_6); __pyx_t_6 = 0; } - /* "View.MemoryView":601 + /* "View.MemoryView":603 * result *= length * * self._size = result # <<<<<<<<<<<<<< @@ -14492,7 +14555,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struc __Pyx_DECREF(__pyx_v_self->_size); __pyx_v_self->_size = __pyx_v_result; - /* "View.MemoryView":595 + /* "View.MemoryView":597 * @property * def size(self): * if self._size is None: # <<<<<<<<<<<<<< @@ -14501,7 +14564,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struc */ } - /* "View.MemoryView":603 + /* "View.MemoryView":605 * self._size = result * * return self._size # <<<<<<<<<<<<<< @@ -14513,7 +14576,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struc __pyx_r = __pyx_v_self->_size; goto __pyx_L0; - /* "View.MemoryView":594 + /* "View.MemoryView":596 * * @property * def size(self): # <<<<<<<<<<<<<< @@ -14534,7 +14597,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struc return __pyx_r; } -/* "View.MemoryView":605 +/* "View.MemoryView":607 * return self._size * * def __len__(self): # <<<<<<<<<<<<<< @@ -14561,7 +14624,7 @@ static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_1 int __pyx_t_1; __Pyx_RefNannySetupContext("__len__", 0); - /* "View.MemoryView":606 + /* "View.MemoryView":608 * * def __len__(self): * if self.view.ndim >= 1: # <<<<<<<<<<<<<< @@ -14571,7 +14634,7 @@ static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_1 __pyx_t_1 = ((__pyx_v_self->view.ndim >= 1) != 0); if (__pyx_t_1) { - /* "View.MemoryView":607 + /* "View.MemoryView":609 * def __len__(self): * if self.view.ndim >= 1: * return self.view.shape[0] # <<<<<<<<<<<<<< @@ -14581,7 +14644,7 @@ static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_1 __pyx_r = (__pyx_v_self->view.shape[0]); goto __pyx_L0; - /* "View.MemoryView":606 + /* "View.MemoryView":608 * * def __len__(self): * if self.view.ndim >= 1: # <<<<<<<<<<<<<< @@ -14590,7 +14653,7 @@ static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_1 */ } - /* "View.MemoryView":609 + /* "View.MemoryView":611 * return self.view.shape[0] * * return 0 # <<<<<<<<<<<<<< @@ -14600,7 +14663,7 @@ static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_1 __pyx_r = 0; goto __pyx_L0; - /* "View.MemoryView":605 + /* "View.MemoryView":607 * return self._size * * def __len__(self): # <<<<<<<<<<<<<< @@ -14614,7 +14677,7 @@ static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_1 return __pyx_r; } -/* "View.MemoryView":611 +/* "View.MemoryView":613 * return 0 * * def __repr__(self): # <<<<<<<<<<<<<< @@ -14646,7 +14709,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_12 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__repr__", 0); - /* "View.MemoryView":612 + /* "View.MemoryView":614 * * def __repr__(self): * return "" % (self.base.__class__.__name__, # <<<<<<<<<<<<<< @@ -14654,33 +14717,33 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_12 * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_base); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 612, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_base); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 614, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_class); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 612, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_class); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 614, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_name_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 612, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_name_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 614, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "View.MemoryView":613 + /* "View.MemoryView":615 * def __repr__(self): * return "" % (self.base.__class__.__name__, * id(self)) # <<<<<<<<<<<<<< * * def __str__(self): */ - __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_builtin_id, ((PyObject *)__pyx_v_self)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 613, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_builtin_id, ((PyObject *)__pyx_v_self)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 615, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - /* "View.MemoryView":612 + /* "View.MemoryView":614 * * def __repr__(self): * return "" % (self.base.__class__.__name__, # <<<<<<<<<<<<<< * id(self)) * */ - __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 612, __pyx_L1_error) + __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 614, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1); @@ -14688,14 +14751,14 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_12 PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2); __pyx_t_1 = 0; __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_PyString_Format(__pyx_kp_s_MemoryView_of_r_at_0x_x, __pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 612, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyString_Format(__pyx_kp_s_MemoryView_of_r_at_0x_x, __pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 614, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "View.MemoryView":611 + /* "View.MemoryView":613 * return 0 * * def __repr__(self): # <<<<<<<<<<<<<< @@ -14716,7 +14779,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_12 return __pyx_r; } -/* "View.MemoryView":615 +/* "View.MemoryView":617 * id(self)) * * def __str__(self): # <<<<<<<<<<<<<< @@ -14747,7 +14810,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_14 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__str__", 0); - /* "View.MemoryView":616 + /* "View.MemoryView":618 * * def __str__(self): * return "" % (self.base.__class__.__name__,) # <<<<<<<<<<<<<< @@ -14755,27 +14818,27 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_14 * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_base); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 616, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_self), __pyx_n_s_base); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 618, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_class); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 616, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_class); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 618, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_name_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 616, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_name_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 618, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 616, __pyx_L1_error) + __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 618, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_MemoryView_of_r_object, __pyx_t_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 616, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_MemoryView_of_r_object, __pyx_t_2); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 618, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "View.MemoryView":615 + /* "View.MemoryView":617 * id(self)) * * def __str__(self): # <<<<<<<<<<<<<< @@ -14795,7 +14858,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_14 return __pyx_r; } -/* "View.MemoryView":619 +/* "View.MemoryView":621 * * * def is_c_contig(self): # <<<<<<<<<<<<<< @@ -14828,17 +14891,17 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_16 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("is_c_contig", 0); - /* "View.MemoryView":622 + /* "View.MemoryView":624 * cdef __Pyx_memviewslice *mslice * cdef __Pyx_memviewslice tmp * mslice = get_slice_from_memview(self, &tmp) # <<<<<<<<<<<<<< * return slice_is_contig(mslice[0], 'C', self.view.ndim) * */ - __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_self, (&__pyx_v_tmp)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 622, __pyx_L1_error) + __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_self, (&__pyx_v_tmp)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 624, __pyx_L1_error) __pyx_v_mslice = __pyx_t_1; - /* "View.MemoryView":623 + /* "View.MemoryView":625 * cdef __Pyx_memviewslice tmp * mslice = get_slice_from_memview(self, &tmp) * return slice_is_contig(mslice[0], 'C', self.view.ndim) # <<<<<<<<<<<<<< @@ -14846,13 +14909,13 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_16 * def is_f_contig(self): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_memviewslice_is_contig((__pyx_v_mslice[0]), 'C', __pyx_v_self->view.ndim)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 623, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_memviewslice_is_contig((__pyx_v_mslice[0]), 'C', __pyx_v_self->view.ndim)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 625, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "View.MemoryView":619 + /* "View.MemoryView":621 * * * def is_c_contig(self): # <<<<<<<<<<<<<< @@ -14871,7 +14934,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_16 return __pyx_r; } -/* "View.MemoryView":625 +/* "View.MemoryView":627 * return slice_is_contig(mslice[0], 'C', self.view.ndim) * * def is_f_contig(self): # <<<<<<<<<<<<<< @@ -14904,17 +14967,17 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_18 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("is_f_contig", 0); - /* "View.MemoryView":628 + /* "View.MemoryView":630 * cdef __Pyx_memviewslice *mslice * cdef __Pyx_memviewslice tmp * mslice = get_slice_from_memview(self, &tmp) # <<<<<<<<<<<<<< * return slice_is_contig(mslice[0], 'F', self.view.ndim) * */ - __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_self, (&__pyx_v_tmp)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 628, __pyx_L1_error) + __pyx_t_1 = __pyx_memoryview_get_slice_from_memoryview(__pyx_v_self, (&__pyx_v_tmp)); if (unlikely(__pyx_t_1 == ((__Pyx_memviewslice *)NULL))) __PYX_ERR(1, 630, __pyx_L1_error) __pyx_v_mslice = __pyx_t_1; - /* "View.MemoryView":629 + /* "View.MemoryView":631 * cdef __Pyx_memviewslice tmp * mslice = get_slice_from_memview(self, &tmp) * return slice_is_contig(mslice[0], 'F', self.view.ndim) # <<<<<<<<<<<<<< @@ -14922,13 +14985,13 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_18 * def copy(self): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_memviewslice_is_contig((__pyx_v_mslice[0]), 'F', __pyx_v_self->view.ndim)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 629, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_memviewslice_is_contig((__pyx_v_mslice[0]), 'F', __pyx_v_self->view.ndim)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 631, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "View.MemoryView":625 + /* "View.MemoryView":627 * return slice_is_contig(mslice[0], 'C', self.view.ndim) * * def is_f_contig(self): # <<<<<<<<<<<<<< @@ -14947,7 +15010,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_18 return __pyx_r; } -/* "View.MemoryView":631 +/* "View.MemoryView":633 * return slice_is_contig(mslice[0], 'F', self.view.ndim) * * def copy(self): # <<<<<<<<<<<<<< @@ -14980,7 +15043,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("copy", 0); - /* "View.MemoryView":633 + /* "View.MemoryView":635 * def copy(self): * cdef __Pyx_memviewslice mslice * cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS # <<<<<<<<<<<<<< @@ -14989,7 +15052,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20 */ __pyx_v_flags = (__pyx_v_self->flags & (~PyBUF_F_CONTIGUOUS)); - /* "View.MemoryView":635 + /* "View.MemoryView":637 * cdef int flags = self.flags & ~PyBUF_F_CONTIGUOUS * * slice_copy(self, &mslice) # <<<<<<<<<<<<<< @@ -14998,17 +15061,17 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20 */ __pyx_memoryview_slice_copy(__pyx_v_self, (&__pyx_v_mslice)); - /* "View.MemoryView":636 + /* "View.MemoryView":638 * * slice_copy(self, &mslice) * mslice = slice_copy_contig(&mslice, "c", self.view.ndim, # <<<<<<<<<<<<<< * self.view.itemsize, * flags|PyBUF_C_CONTIGUOUS, */ - __pyx_t_1 = __pyx_memoryview_copy_new_contig((&__pyx_v_mslice), ((char *)"c"), __pyx_v_self->view.ndim, __pyx_v_self->view.itemsize, (__pyx_v_flags | PyBUF_C_CONTIGUOUS), __pyx_v_self->dtype_is_object); if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 636, __pyx_L1_error) + __pyx_t_1 = __pyx_memoryview_copy_new_contig((&__pyx_v_mslice), ((char *)"c"), __pyx_v_self->view.ndim, __pyx_v_self->view.itemsize, (__pyx_v_flags | PyBUF_C_CONTIGUOUS), __pyx_v_self->dtype_is_object); if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 638, __pyx_L1_error) __pyx_v_mslice = __pyx_t_1; - /* "View.MemoryView":641 + /* "View.MemoryView":643 * self.dtype_is_object) * * return memoryview_copy_from_slice(self, &mslice) # <<<<<<<<<<<<<< @@ -15016,13 +15079,13 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20 * def copy_fortran(self): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __pyx_memoryview_copy_object_from_slice(__pyx_v_self, (&__pyx_v_mslice)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 641, __pyx_L1_error) + __pyx_t_2 = __pyx_memoryview_copy_object_from_slice(__pyx_v_self, (&__pyx_v_mslice)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 643, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "View.MemoryView":631 + /* "View.MemoryView":633 * return slice_is_contig(mslice[0], 'F', self.view.ndim) * * def copy(self): # <<<<<<<<<<<<<< @@ -15041,7 +15104,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20 return __pyx_r; } -/* "View.MemoryView":643 +/* "View.MemoryView":645 * return memoryview_copy_from_slice(self, &mslice) * * def copy_fortran(self): # <<<<<<<<<<<<<< @@ -15075,7 +15138,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_22 int __pyx_clineno = 0; __Pyx_RefNannySetupContext("copy_fortran", 0); - /* "View.MemoryView":645 + /* "View.MemoryView":647 * def copy_fortran(self): * cdef __Pyx_memviewslice src, dst * cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS # <<<<<<<<<<<<<< @@ -15084,7 +15147,7 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_22 */ __pyx_v_flags = (__pyx_v_self->flags & (~PyBUF_C_CONTIGUOUS)); - /* "View.MemoryView":647 + /* "View.MemoryView":649 * cdef int flags = self.flags & ~PyBUF_C_CONTIGUOUS * * slice_copy(self, &src) # <<<<<<<<<<<<<< @@ -15093,17 +15156,17 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_22 */ __pyx_memoryview_slice_copy(__pyx_v_self, (&__pyx_v_src)); - /* "View.MemoryView":648 + /* "View.MemoryView":650 * * slice_copy(self, &src) * dst = slice_copy_contig(&src, "fortran", self.view.ndim, # <<<<<<<<<<<<<< * self.view.itemsize, * flags|PyBUF_F_CONTIGUOUS, */ - __pyx_t_1 = __pyx_memoryview_copy_new_contig((&__pyx_v_src), ((char *)"fortran"), __pyx_v_self->view.ndim, __pyx_v_self->view.itemsize, (__pyx_v_flags | PyBUF_F_CONTIGUOUS), __pyx_v_self->dtype_is_object); if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 648, __pyx_L1_error) + __pyx_t_1 = __pyx_memoryview_copy_new_contig((&__pyx_v_src), ((char *)"fortran"), __pyx_v_self->view.ndim, __pyx_v_self->view.itemsize, (__pyx_v_flags | PyBUF_F_CONTIGUOUS), __pyx_v_self->dtype_is_object); if (unlikely(PyErr_Occurred())) __PYX_ERR(1, 650, __pyx_L1_error) __pyx_v_dst = __pyx_t_1; - /* "View.MemoryView":653 + /* "View.MemoryView":655 * self.dtype_is_object) * * return memoryview_copy_from_slice(self, &dst) # <<<<<<<<<<<<<< @@ -15111,13 +15174,13 @@ static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_22 * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __pyx_memoryview_copy_object_from_slice(__pyx_v_self, (&__pyx_v_dst)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 653, __pyx_L1_error) + __pyx_t_2 = __pyx_memoryview_copy_object_from_slice(__pyx_v_self, (&__pyx_v_dst)); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 655, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "View.MemoryView":643 + /* "View.MemoryView":645 * return memoryview_copy_from_slice(self, &mslice) * * def copy_fortran(self): # <<<<<<<<<<<<<< @@ -15249,7 +15312,7 @@ static PyObject *__pyx_pf___pyx_memoryview_2__setstate_cython__(CYTHON_UNUSED st return __pyx_r; } -/* "View.MemoryView":657 +/* "View.MemoryView":659 * * @cname('__pyx_memoryview_new') * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo): # <<<<<<<<<<<<<< @@ -15269,18 +15332,18 @@ static PyObject *__pyx_memoryview_new(PyObject *__pyx_v_o, int __pyx_v_flags, in int __pyx_clineno = 0; __Pyx_RefNannySetupContext("memoryview_cwrapper", 0); - /* "View.MemoryView":658 + /* "View.MemoryView":660 * @cname('__pyx_memoryview_new') * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo): * cdef memoryview result = memoryview(o, flags, dtype_is_object) # <<<<<<<<<<<<<< * result.typeinfo = typeinfo * return result */ - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_flags); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 658, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_flags); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 660, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 658, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 660, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 658, __pyx_L1_error) + __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 660, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_v_o); __Pyx_GIVEREF(__pyx_v_o); @@ -15291,13 +15354,13 @@ static PyObject *__pyx_memoryview_new(PyObject *__pyx_v_o, int __pyx_v_flags, in PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_2); __pyx_t_1 = 0; __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 658, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryview_type), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 660, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_result = ((struct __pyx_memoryview_obj *)__pyx_t_2); __pyx_t_2 = 0; - /* "View.MemoryView":659 + /* "View.MemoryView":661 * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo): * cdef memoryview result = memoryview(o, flags, dtype_is_object) * result.typeinfo = typeinfo # <<<<<<<<<<<<<< @@ -15306,7 +15369,7 @@ static PyObject *__pyx_memoryview_new(PyObject *__pyx_v_o, int __pyx_v_flags, in */ __pyx_v_result->typeinfo = __pyx_v_typeinfo; - /* "View.MemoryView":660 + /* "View.MemoryView":662 * cdef memoryview result = memoryview(o, flags, dtype_is_object) * result.typeinfo = typeinfo * return result # <<<<<<<<<<<<<< @@ -15318,7 +15381,7 @@ static PyObject *__pyx_memoryview_new(PyObject *__pyx_v_o, int __pyx_v_flags, in __pyx_r = ((PyObject *)__pyx_v_result); goto __pyx_L0; - /* "View.MemoryView":657 + /* "View.MemoryView":659 * * @cname('__pyx_memoryview_new') * cdef memoryview_cwrapper(object o, int flags, bint dtype_is_object, __Pyx_TypeInfo *typeinfo): # <<<<<<<<<<<<<< @@ -15340,7 +15403,7 @@ static PyObject *__pyx_memoryview_new(PyObject *__pyx_v_o, int __pyx_v_flags, in return __pyx_r; } -/* "View.MemoryView":663 +/* "View.MemoryView":665 * * @cname('__pyx_memoryview_check') * cdef inline bint memoryview_check(object o): # <<<<<<<<<<<<<< @@ -15354,7 +15417,7 @@ static CYTHON_INLINE int __pyx_memoryview_check(PyObject *__pyx_v_o) { int __pyx_t_1; __Pyx_RefNannySetupContext("memoryview_check", 0); - /* "View.MemoryView":664 + /* "View.MemoryView":666 * @cname('__pyx_memoryview_check') * cdef inline bint memoryview_check(object o): * return isinstance(o, memoryview) # <<<<<<<<<<<<<< @@ -15365,7 +15428,7 @@ static CYTHON_INLINE int __pyx_memoryview_check(PyObject *__pyx_v_o) { __pyx_r = __pyx_t_1; goto __pyx_L0; - /* "View.MemoryView":663 + /* "View.MemoryView":665 * * @cname('__pyx_memoryview_check') * cdef inline bint memoryview_check(object o): # <<<<<<<<<<<<<< @@ -15379,7 +15442,7 @@ static CYTHON_INLINE int __pyx_memoryview_check(PyObject *__pyx_v_o) { return __pyx_r; } -/* "View.MemoryView":666 +/* "View.MemoryView":668 * return isinstance(o, memoryview) * * cdef tuple _unellipsify(object index, int ndim): # <<<<<<<<<<<<<< @@ -15413,7 +15476,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_unellipsify", 0); - /* "View.MemoryView":671 + /* "View.MemoryView":673 * full slices. * """ * if not isinstance(index, tuple): # <<<<<<<<<<<<<< @@ -15424,14 +15487,14 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __pyx_t_2 = ((!(__pyx_t_1 != 0)) != 0); if (__pyx_t_2) { - /* "View.MemoryView":672 + /* "View.MemoryView":674 * """ * if not isinstance(index, tuple): * tup = (index,) # <<<<<<<<<<<<<< * else: * tup = index */ - __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 672, __pyx_L1_error) + __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 674, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_v_index); __Pyx_GIVEREF(__pyx_v_index); @@ -15439,7 +15502,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __pyx_v_tup = __pyx_t_3; __pyx_t_3 = 0; - /* "View.MemoryView":671 + /* "View.MemoryView":673 * full slices. * """ * if not isinstance(index, tuple): # <<<<<<<<<<<<<< @@ -15449,7 +15512,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { goto __pyx_L3; } - /* "View.MemoryView":674 + /* "View.MemoryView":676 * tup = (index,) * else: * tup = index # <<<<<<<<<<<<<< @@ -15462,19 +15525,19 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { } __pyx_L3:; - /* "View.MemoryView":676 + /* "View.MemoryView":678 * tup = index * * result = [] # <<<<<<<<<<<<<< * have_slices = False * seen_ellipsis = False */ - __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 676, __pyx_L1_error) + __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 678, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_v_result = ((PyObject*)__pyx_t_3); __pyx_t_3 = 0; - /* "View.MemoryView":677 + /* "View.MemoryView":679 * * result = [] * have_slices = False # <<<<<<<<<<<<<< @@ -15483,7 +15546,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { */ __pyx_v_have_slices = 0; - /* "View.MemoryView":678 + /* "View.MemoryView":680 * result = [] * have_slices = False * seen_ellipsis = False # <<<<<<<<<<<<<< @@ -15492,7 +15555,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { */ __pyx_v_seen_ellipsis = 0; - /* "View.MemoryView":679 + /* "View.MemoryView":681 * have_slices = False * seen_ellipsis = False * for idx, item in enumerate(tup): # <<<<<<<<<<<<<< @@ -15505,26 +15568,26 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __pyx_t_4 = __pyx_v_tup; __Pyx_INCREF(__pyx_t_4); __pyx_t_5 = 0; __pyx_t_6 = NULL; } else { - __pyx_t_5 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_tup); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 679, __pyx_L1_error) + __pyx_t_5 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_v_tup); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 681, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_6 = Py_TYPE(__pyx_t_4)->tp_iternext; if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 679, __pyx_L1_error) + __pyx_t_6 = Py_TYPE(__pyx_t_4)->tp_iternext; if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 681, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_6)) { if (likely(PyList_CheckExact(__pyx_t_4))) { if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_4)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_7 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_5); __Pyx_INCREF(__pyx_t_7); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(1, 679, __pyx_L1_error) + __pyx_t_7 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_5); __Pyx_INCREF(__pyx_t_7); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(1, 681, __pyx_L1_error) #else - __pyx_t_7 = PySequence_ITEM(__pyx_t_4, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 679, __pyx_L1_error) + __pyx_t_7 = PySequence_ITEM(__pyx_t_4, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 681, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); #endif } else { if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_4)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_7 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_5); __Pyx_INCREF(__pyx_t_7); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(1, 679, __pyx_L1_error) + __pyx_t_7 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_5); __Pyx_INCREF(__pyx_t_7); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(1, 681, __pyx_L1_error) #else - __pyx_t_7 = PySequence_ITEM(__pyx_t_4, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 679, __pyx_L1_error) + __pyx_t_7 = PySequence_ITEM(__pyx_t_4, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 681, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); #endif } @@ -15534,7 +15597,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(1, 679, __pyx_L1_error) + else __PYX_ERR(1, 681, __pyx_L1_error) } break; } @@ -15544,13 +15607,13 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __pyx_t_7 = 0; __Pyx_INCREF(__pyx_t_3); __Pyx_XDECREF_SET(__pyx_v_idx, __pyx_t_3); - __pyx_t_7 = __Pyx_PyInt_AddObjC(__pyx_t_3, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 679, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyInt_AddObjC(__pyx_t_3, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 681, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = __pyx_t_7; __pyx_t_7 = 0; - /* "View.MemoryView":680 + /* "View.MemoryView":682 * seen_ellipsis = False * for idx, item in enumerate(tup): * if item is Ellipsis: # <<<<<<<<<<<<<< @@ -15561,7 +15624,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __pyx_t_1 = (__pyx_t_2 != 0); if (__pyx_t_1) { - /* "View.MemoryView":681 + /* "View.MemoryView":683 * for idx, item in enumerate(tup): * if item is Ellipsis: * if not seen_ellipsis: # <<<<<<<<<<<<<< @@ -15571,15 +15634,15 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __pyx_t_1 = ((!(__pyx_v_seen_ellipsis != 0)) != 0); if (__pyx_t_1) { - /* "View.MemoryView":682 + /* "View.MemoryView":684 * if item is Ellipsis: * if not seen_ellipsis: * result.extend([slice(None)] * (ndim - len(tup) + 1)) # <<<<<<<<<<<<<< * seen_ellipsis = True * else: */ - __pyx_t_8 = PyObject_Length(__pyx_v_tup); if (unlikely(__pyx_t_8 == ((Py_ssize_t)-1))) __PYX_ERR(1, 682, __pyx_L1_error) - __pyx_t_7 = PyList_New(1 * ((((__pyx_v_ndim - __pyx_t_8) + 1)<0) ? 0:((__pyx_v_ndim - __pyx_t_8) + 1))); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 682, __pyx_L1_error) + __pyx_t_8 = PyObject_Length(__pyx_v_tup); if (unlikely(__pyx_t_8 == ((Py_ssize_t)-1))) __PYX_ERR(1, 684, __pyx_L1_error) + __pyx_t_7 = PyList_New(1 * ((((__pyx_v_ndim - __pyx_t_8) + 1)<0) ? 0:((__pyx_v_ndim - __pyx_t_8) + 1))); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 684, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); { Py_ssize_t __pyx_temp; for (__pyx_temp=0; __pyx_temp < ((__pyx_v_ndim - __pyx_t_8) + 1); __pyx_temp++) { @@ -15588,10 +15651,10 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { PyList_SET_ITEM(__pyx_t_7, __pyx_temp, __pyx_slice__30); } } - __pyx_t_9 = __Pyx_PyList_Extend(__pyx_v_result, __pyx_t_7); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(1, 682, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyList_Extend(__pyx_v_result, __pyx_t_7); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(1, 684, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "View.MemoryView":683 + /* "View.MemoryView":685 * if not seen_ellipsis: * result.extend([slice(None)] * (ndim - len(tup) + 1)) * seen_ellipsis = True # <<<<<<<<<<<<<< @@ -15600,7 +15663,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { */ __pyx_v_seen_ellipsis = 1; - /* "View.MemoryView":681 + /* "View.MemoryView":683 * for idx, item in enumerate(tup): * if item is Ellipsis: * if not seen_ellipsis: # <<<<<<<<<<<<<< @@ -15610,7 +15673,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { goto __pyx_L7; } - /* "View.MemoryView":685 + /* "View.MemoryView":687 * seen_ellipsis = True * else: * result.append(slice(None)) # <<<<<<<<<<<<<< @@ -15618,11 +15681,11 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { * else: */ /*else*/ { - __pyx_t_9 = __Pyx_PyList_Append(__pyx_v_result, __pyx_slice__30); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(1, 685, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyList_Append(__pyx_v_result, __pyx_slice__30); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(1, 687, __pyx_L1_error) } __pyx_L7:; - /* "View.MemoryView":686 + /* "View.MemoryView":688 * else: * result.append(slice(None)) * have_slices = True # <<<<<<<<<<<<<< @@ -15631,7 +15694,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { */ __pyx_v_have_slices = 1; - /* "View.MemoryView":680 + /* "View.MemoryView":682 * seen_ellipsis = False * for idx, item in enumerate(tup): * if item is Ellipsis: # <<<<<<<<<<<<<< @@ -15641,7 +15704,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { goto __pyx_L6; } - /* "View.MemoryView":688 + /* "View.MemoryView":690 * have_slices = True * else: * if not isinstance(item, slice) and not PyIndex_Check(item): # <<<<<<<<<<<<<< @@ -15661,23 +15724,23 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __pyx_L9_bool_binop_done:; if (unlikely(__pyx_t_1)) { - /* "View.MemoryView":689 + /* "View.MemoryView":691 * else: * if not isinstance(item, slice) and not PyIndex_Check(item): * raise TypeError("Cannot index with type '%s'" % type(item)) # <<<<<<<<<<<<<< * * have_slices = have_slices or isinstance(item, slice) */ - __pyx_t_7 = __Pyx_PyString_FormatSafe(__pyx_kp_s_Cannot_index_with_type_s, ((PyObject *)Py_TYPE(__pyx_v_item))); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 689, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyString_FormatSafe(__pyx_kp_s_Cannot_index_with_type_s, ((PyObject *)Py_TYPE(__pyx_v_item))); if (unlikely(!__pyx_t_7)) __PYX_ERR(1, 691, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); - __pyx_t_11 = __Pyx_PyObject_CallOneArg(__pyx_builtin_TypeError, __pyx_t_7); if (unlikely(!__pyx_t_11)) __PYX_ERR(1, 689, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_CallOneArg(__pyx_builtin_TypeError, __pyx_t_7); if (unlikely(!__pyx_t_11)) __PYX_ERR(1, 691, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __Pyx_Raise(__pyx_t_11, 0, 0, 0); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - __PYX_ERR(1, 689, __pyx_L1_error) + __PYX_ERR(1, 691, __pyx_L1_error) - /* "View.MemoryView":688 + /* "View.MemoryView":690 * have_slices = True * else: * if not isinstance(item, slice) and not PyIndex_Check(item): # <<<<<<<<<<<<<< @@ -15686,7 +15749,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { */ } - /* "View.MemoryView":691 + /* "View.MemoryView":693 * raise TypeError("Cannot index with type '%s'" % type(item)) * * have_slices = have_slices or isinstance(item, slice) # <<<<<<<<<<<<<< @@ -15705,18 +15768,18 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __pyx_L11_bool_binop_done:; __pyx_v_have_slices = __pyx_t_1; - /* "View.MemoryView":692 + /* "View.MemoryView":694 * * have_slices = have_slices or isinstance(item, slice) * result.append(item) # <<<<<<<<<<<<<< * * nslices = ndim - len(result) */ - __pyx_t_9 = __Pyx_PyList_Append(__pyx_v_result, __pyx_v_item); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(1, 692, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyList_Append(__pyx_v_result, __pyx_v_item); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(1, 694, __pyx_L1_error) } __pyx_L6:; - /* "View.MemoryView":679 + /* "View.MemoryView":681 * have_slices = False * seen_ellipsis = False * for idx, item in enumerate(tup): # <<<<<<<<<<<<<< @@ -15727,17 +15790,17 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "View.MemoryView":694 + /* "View.MemoryView":696 * result.append(item) * * nslices = ndim - len(result) # <<<<<<<<<<<<<< * if nslices: * result.extend([slice(None)] * nslices) */ - __pyx_t_5 = PyList_GET_SIZE(__pyx_v_result); if (unlikely(__pyx_t_5 == ((Py_ssize_t)-1))) __PYX_ERR(1, 694, __pyx_L1_error) + __pyx_t_5 = PyList_GET_SIZE(__pyx_v_result); if (unlikely(__pyx_t_5 == ((Py_ssize_t)-1))) __PYX_ERR(1, 696, __pyx_L1_error) __pyx_v_nslices = (__pyx_v_ndim - __pyx_t_5); - /* "View.MemoryView":695 + /* "View.MemoryView":697 * * nslices = ndim - len(result) * if nslices: # <<<<<<<<<<<<<< @@ -15747,14 +15810,14 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __pyx_t_1 = (__pyx_v_nslices != 0); if (__pyx_t_1) { - /* "View.MemoryView":696 + /* "View.MemoryView":698 * nslices = ndim - len(result) * if nslices: * result.extend([slice(None)] * nslices) # <<<<<<<<<<<<<< * * return have_slices or nslices, tuple(result) */ - __pyx_t_3 = PyList_New(1 * ((__pyx_v_nslices<0) ? 0:__pyx_v_nslices)); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 696, __pyx_L1_error) + __pyx_t_3 = PyList_New(1 * ((__pyx_v_nslices<0) ? 0:__pyx_v_nslices)); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 698, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); { Py_ssize_t __pyx_temp; for (__pyx_temp=0; __pyx_temp < __pyx_v_nslices; __pyx_temp++) { @@ -15763,10 +15826,10 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { PyList_SET_ITEM(__pyx_t_3, __pyx_temp, __pyx_slice__30); } } - __pyx_t_9 = __Pyx_PyList_Extend(__pyx_v_result, __pyx_t_3); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(1, 696, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyList_Extend(__pyx_v_result, __pyx_t_3); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(1, 698, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "View.MemoryView":695 + /* "View.MemoryView":697 * * nslices = ndim - len(result) * if nslices: # <<<<<<<<<<<<<< @@ -15775,7 +15838,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { */ } - /* "View.MemoryView":698 + /* "View.MemoryView":700 * result.extend([slice(None)] * nslices) * * return have_slices or nslices, tuple(result) # <<<<<<<<<<<<<< @@ -15785,20 +15848,20 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __Pyx_XDECREF(__pyx_r); if (!__pyx_v_have_slices) { } else { - __pyx_t_4 = __Pyx_PyBool_FromLong(__pyx_v_have_slices); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 698, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyBool_FromLong(__pyx_v_have_slices); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 700, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = __pyx_t_4; __pyx_t_4 = 0; goto __pyx_L14_bool_binop_done; } - __pyx_t_4 = PyInt_FromSsize_t(__pyx_v_nslices); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 698, __pyx_L1_error) + __pyx_t_4 = PyInt_FromSsize_t(__pyx_v_nslices); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 700, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_3 = __pyx_t_4; __pyx_t_4 = 0; __pyx_L14_bool_binop_done:; - __pyx_t_4 = PyList_AsTuple(__pyx_v_result); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 698, __pyx_L1_error) + __pyx_t_4 = PyList_AsTuple(__pyx_v_result); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 700, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_11 = PyTuple_New(2); if (unlikely(!__pyx_t_11)) __PYX_ERR(1, 698, __pyx_L1_error) + __pyx_t_11 = PyTuple_New(2); if (unlikely(!__pyx_t_11)) __PYX_ERR(1, 700, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_t_3); @@ -15810,7 +15873,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { __pyx_t_11 = 0; goto __pyx_L0; - /* "View.MemoryView":666 + /* "View.MemoryView":668 * return isinstance(o, memoryview) * * cdef tuple _unellipsify(object index, int ndim): # <<<<<<<<<<<<<< @@ -15836,7 +15899,7 @@ static PyObject *_unellipsify(PyObject *__pyx_v_index, int __pyx_v_ndim) { return __pyx_r; } -/* "View.MemoryView":700 +/* "View.MemoryView":702 * return have_slices or nslices, tuple(result) * * cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim): # <<<<<<<<<<<<<< @@ -15858,7 +15921,7 @@ static PyObject *assert_direct_dimensions(Py_ssize_t *__pyx_v_suboffsets, int __ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("assert_direct_dimensions", 0); - /* "View.MemoryView":701 + /* "View.MemoryView":703 * * cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim): * for suboffset in suboffsets[:ndim]: # <<<<<<<<<<<<<< @@ -15870,7 +15933,7 @@ static PyObject *assert_direct_dimensions(Py_ssize_t *__pyx_v_suboffsets, int __ __pyx_t_1 = __pyx_t_3; __pyx_v_suboffset = (__pyx_t_1[0]); - /* "View.MemoryView":702 + /* "View.MemoryView":704 * cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim): * for suboffset in suboffsets[:ndim]: * if suboffset >= 0: # <<<<<<<<<<<<<< @@ -15880,20 +15943,20 @@ static PyObject *assert_direct_dimensions(Py_ssize_t *__pyx_v_suboffsets, int __ __pyx_t_4 = ((__pyx_v_suboffset >= 0) != 0); if (unlikely(__pyx_t_4)) { - /* "View.MemoryView":703 + /* "View.MemoryView":705 * for suboffset in suboffsets[:ndim]: * if suboffset >= 0: * raise ValueError("Indirect dimensions not supported") # <<<<<<<<<<<<<< * * */ - __pyx_t_5 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__31, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 703, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__31, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 705, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_Raise(__pyx_t_5, 0, 0, 0); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __PYX_ERR(1, 703, __pyx_L1_error) + __PYX_ERR(1, 705, __pyx_L1_error) - /* "View.MemoryView":702 + /* "View.MemoryView":704 * cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim): * for suboffset in suboffsets[:ndim]: * if suboffset >= 0: # <<<<<<<<<<<<<< @@ -15903,7 +15966,7 @@ static PyObject *assert_direct_dimensions(Py_ssize_t *__pyx_v_suboffsets, int __ } } - /* "View.MemoryView":700 + /* "View.MemoryView":702 * return have_slices or nslices, tuple(result) * * cdef assert_direct_dimensions(Py_ssize_t *suboffsets, int ndim): # <<<<<<<<<<<<<< @@ -15924,7 +15987,7 @@ static PyObject *assert_direct_dimensions(Py_ssize_t *__pyx_v_suboffsets, int __ return __pyx_r; } -/* "View.MemoryView":710 +/* "View.MemoryView":712 * * @cname('__pyx_memview_slice') * cdef memoryview memview_slice(memoryview memview, object indices): # <<<<<<<<<<<<<< @@ -15968,7 +16031,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("memview_slice", 0); - /* "View.MemoryView":711 + /* "View.MemoryView":713 * @cname('__pyx_memview_slice') * cdef memoryview memview_slice(memoryview memview, object indices): * cdef int new_ndim = 0, suboffset_dim = -1, dim # <<<<<<<<<<<<<< @@ -15978,7 +16041,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_v_new_ndim = 0; __pyx_v_suboffset_dim = -1; - /* "View.MemoryView":718 + /* "View.MemoryView":720 * * * memset(&dst, 0, sizeof(dst)) # <<<<<<<<<<<<<< @@ -15987,7 +16050,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ */ (void)(memset((&__pyx_v_dst), 0, (sizeof(__pyx_v_dst)))); - /* "View.MemoryView":722 + /* "View.MemoryView":724 * cdef _memoryviewslice memviewsliceobj * * assert memview.view.ndim > 0 # <<<<<<<<<<<<<< @@ -15998,12 +16061,12 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ if (unlikely(!Py_OptimizeFlag)) { if (unlikely(!((__pyx_v_memview->view.ndim > 0) != 0))) { PyErr_SetNone(PyExc_AssertionError); - __PYX_ERR(1, 722, __pyx_L1_error) + __PYX_ERR(1, 724, __pyx_L1_error) } } #endif - /* "View.MemoryView":724 + /* "View.MemoryView":726 * assert memview.view.ndim > 0 * * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< @@ -16014,20 +16077,20 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "View.MemoryView":725 + /* "View.MemoryView":727 * * if isinstance(memview, _memoryviewslice): * memviewsliceobj = memview # <<<<<<<<<<<<<< * p_src = &memviewsliceobj.from_slice * else: */ - if (!(likely(((((PyObject *)__pyx_v_memview)) == Py_None) || likely(__Pyx_TypeTest(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type))))) __PYX_ERR(1, 725, __pyx_L1_error) + if (!(likely(((((PyObject *)__pyx_v_memview)) == Py_None) || likely(__Pyx_TypeTest(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type))))) __PYX_ERR(1, 727, __pyx_L1_error) __pyx_t_3 = ((PyObject *)__pyx_v_memview); __Pyx_INCREF(__pyx_t_3); __pyx_v_memviewsliceobj = ((struct __pyx_memoryviewslice_obj *)__pyx_t_3); __pyx_t_3 = 0; - /* "View.MemoryView":726 + /* "View.MemoryView":728 * if isinstance(memview, _memoryviewslice): * memviewsliceobj = memview * p_src = &memviewsliceobj.from_slice # <<<<<<<<<<<<<< @@ -16036,7 +16099,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ */ __pyx_v_p_src = (&__pyx_v_memviewsliceobj->from_slice); - /* "View.MemoryView":724 + /* "View.MemoryView":726 * assert memview.view.ndim > 0 * * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< @@ -16046,7 +16109,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ goto __pyx_L3; } - /* "View.MemoryView":728 + /* "View.MemoryView":730 * p_src = &memviewsliceobj.from_slice * else: * slice_copy(memview, &src) # <<<<<<<<<<<<<< @@ -16056,7 +16119,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ /*else*/ { __pyx_memoryview_slice_copy(__pyx_v_memview, (&__pyx_v_src)); - /* "View.MemoryView":729 + /* "View.MemoryView":731 * else: * slice_copy(memview, &src) * p_src = &src # <<<<<<<<<<<<<< @@ -16067,7 +16130,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ } __pyx_L3:; - /* "View.MemoryView":735 + /* "View.MemoryView":737 * * * dst.memview = p_src.memview # <<<<<<<<<<<<<< @@ -16077,7 +16140,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_t_4 = __pyx_v_p_src->memview; __pyx_v_dst.memview = __pyx_t_4; - /* "View.MemoryView":736 + /* "View.MemoryView":738 * * dst.memview = p_src.memview * dst.data = p_src.data # <<<<<<<<<<<<<< @@ -16087,7 +16150,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_t_5 = __pyx_v_p_src->data; __pyx_v_dst.data = __pyx_t_5; - /* "View.MemoryView":741 + /* "View.MemoryView":743 * * * cdef __Pyx_memviewslice *p_dst = &dst # <<<<<<<<<<<<<< @@ -16096,7 +16159,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ */ __pyx_v_p_dst = (&__pyx_v_dst); - /* "View.MemoryView":742 + /* "View.MemoryView":744 * * cdef __Pyx_memviewslice *p_dst = &dst * cdef int *p_suboffset_dim = &suboffset_dim # <<<<<<<<<<<<<< @@ -16105,7 +16168,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ */ __pyx_v_p_suboffset_dim = (&__pyx_v_suboffset_dim); - /* "View.MemoryView":746 + /* "View.MemoryView":748 * cdef bint have_start, have_stop, have_step * * for dim, index in enumerate(indices): # <<<<<<<<<<<<<< @@ -16117,26 +16180,26 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_t_3 = __pyx_v_indices; __Pyx_INCREF(__pyx_t_3); __pyx_t_7 = 0; __pyx_t_8 = NULL; } else { - __pyx_t_7 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_indices); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 746, __pyx_L1_error) + __pyx_t_7 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_indices); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 748, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_8 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 746, __pyx_L1_error) + __pyx_t_8 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 748, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_8)) { if (likely(PyList_CheckExact(__pyx_t_3))) { if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_9 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_9); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(1, 746, __pyx_L1_error) + __pyx_t_9 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_9); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(1, 748, __pyx_L1_error) #else - __pyx_t_9 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 746, __pyx_L1_error) + __pyx_t_9 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 748, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); #endif } else { if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_3)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_9 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_9); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(1, 746, __pyx_L1_error) + __pyx_t_9 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_9); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(1, 748, __pyx_L1_error) #else - __pyx_t_9 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 746, __pyx_L1_error) + __pyx_t_9 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 748, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); #endif } @@ -16146,7 +16209,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(1, 746, __pyx_L1_error) + else __PYX_ERR(1, 748, __pyx_L1_error) } break; } @@ -16157,7 +16220,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_v_dim = __pyx_t_6; __pyx_t_6 = (__pyx_t_6 + 1); - /* "View.MemoryView":747 + /* "View.MemoryView":749 * * for dim, index in enumerate(indices): * if PyIndex_Check(index): # <<<<<<<<<<<<<< @@ -16167,25 +16230,25 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_t_2 = (PyIndex_Check(__pyx_v_index) != 0); if (__pyx_t_2) { - /* "View.MemoryView":751 + /* "View.MemoryView":753 * p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim], * dim, new_ndim, p_suboffset_dim, * index, 0, 0, # start, stop, step # <<<<<<<<<<<<<< * 0, 0, 0, # have_{start,stop,step} * False) */ - __pyx_t_10 = __Pyx_PyIndex_AsSsize_t(__pyx_v_index); if (unlikely((__pyx_t_10 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 751, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyIndex_AsSsize_t(__pyx_v_index); if (unlikely((__pyx_t_10 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 753, __pyx_L1_error) - /* "View.MemoryView":748 + /* "View.MemoryView":750 * for dim, index in enumerate(indices): * if PyIndex_Check(index): * slice_memviewslice( # <<<<<<<<<<<<<< * p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim], * dim, new_ndim, p_suboffset_dim, */ - __pyx_t_11 = __pyx_memoryview_slice_memviewslice(__pyx_v_p_dst, (__pyx_v_p_src->shape[__pyx_v_dim]), (__pyx_v_p_src->strides[__pyx_v_dim]), (__pyx_v_p_src->suboffsets[__pyx_v_dim]), __pyx_v_dim, __pyx_v_new_ndim, __pyx_v_p_suboffset_dim, __pyx_t_10, 0, 0, 0, 0, 0, 0); if (unlikely(__pyx_t_11 == ((int)-1))) __PYX_ERR(1, 748, __pyx_L1_error) + __pyx_t_11 = __pyx_memoryview_slice_memviewslice(__pyx_v_p_dst, (__pyx_v_p_src->shape[__pyx_v_dim]), (__pyx_v_p_src->strides[__pyx_v_dim]), (__pyx_v_p_src->suboffsets[__pyx_v_dim]), __pyx_v_dim, __pyx_v_new_ndim, __pyx_v_p_suboffset_dim, __pyx_t_10, 0, 0, 0, 0, 0, 0); if (unlikely(__pyx_t_11 == ((int)-1))) __PYX_ERR(1, 750, __pyx_L1_error) - /* "View.MemoryView":747 + /* "View.MemoryView":749 * * for dim, index in enumerate(indices): * if PyIndex_Check(index): # <<<<<<<<<<<<<< @@ -16195,7 +16258,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ goto __pyx_L6; } - /* "View.MemoryView":754 + /* "View.MemoryView":756 * 0, 0, 0, # have_{start,stop,step} * False) * elif index is None: # <<<<<<<<<<<<<< @@ -16206,7 +16269,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_t_1 = (__pyx_t_2 != 0); if (__pyx_t_1) { - /* "View.MemoryView":755 + /* "View.MemoryView":757 * False) * elif index is None: * p_dst.shape[new_ndim] = 1 # <<<<<<<<<<<<<< @@ -16215,7 +16278,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ */ (__pyx_v_p_dst->shape[__pyx_v_new_ndim]) = 1; - /* "View.MemoryView":756 + /* "View.MemoryView":758 * elif index is None: * p_dst.shape[new_ndim] = 1 * p_dst.strides[new_ndim] = 0 # <<<<<<<<<<<<<< @@ -16224,7 +16287,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ */ (__pyx_v_p_dst->strides[__pyx_v_new_ndim]) = 0; - /* "View.MemoryView":757 + /* "View.MemoryView":759 * p_dst.shape[new_ndim] = 1 * p_dst.strides[new_ndim] = 0 * p_dst.suboffsets[new_ndim] = -1 # <<<<<<<<<<<<<< @@ -16233,7 +16296,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ */ (__pyx_v_p_dst->suboffsets[__pyx_v_new_ndim]) = -1L; - /* "View.MemoryView":758 + /* "View.MemoryView":760 * p_dst.strides[new_ndim] = 0 * p_dst.suboffsets[new_ndim] = -1 * new_ndim += 1 # <<<<<<<<<<<<<< @@ -16242,7 +16305,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ */ __pyx_v_new_ndim = (__pyx_v_new_ndim + 1); - /* "View.MemoryView":754 + /* "View.MemoryView":756 * 0, 0, 0, # have_{start,stop,step} * False) * elif index is None: # <<<<<<<<<<<<<< @@ -16252,7 +16315,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ goto __pyx_L6; } - /* "View.MemoryView":760 + /* "View.MemoryView":762 * new_ndim += 1 * else: * start = index.start or 0 # <<<<<<<<<<<<<< @@ -16260,13 +16323,13 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ * step = index.step or 0 */ /*else*/ { - __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_start); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 760, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_start); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 762, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 760, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 762, __pyx_L1_error) if (!__pyx_t_1) { __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; } else { - __pyx_t_12 = __Pyx_PyIndex_AsSsize_t(__pyx_t_9); if (unlikely((__pyx_t_12 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 760, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyIndex_AsSsize_t(__pyx_t_9); if (unlikely((__pyx_t_12 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 762, __pyx_L1_error) __pyx_t_10 = __pyx_t_12; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; goto __pyx_L7_bool_binop_done; @@ -16275,20 +16338,20 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_L7_bool_binop_done:; __pyx_v_start = __pyx_t_10; - /* "View.MemoryView":761 + /* "View.MemoryView":763 * else: * start = index.start or 0 * stop = index.stop or 0 # <<<<<<<<<<<<<< * step = index.step or 0 * */ - __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_stop); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 761, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_stop); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 763, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 761, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 763, __pyx_L1_error) if (!__pyx_t_1) { __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; } else { - __pyx_t_12 = __Pyx_PyIndex_AsSsize_t(__pyx_t_9); if (unlikely((__pyx_t_12 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 761, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyIndex_AsSsize_t(__pyx_t_9); if (unlikely((__pyx_t_12 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 763, __pyx_L1_error) __pyx_t_10 = __pyx_t_12; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; goto __pyx_L9_bool_binop_done; @@ -16297,20 +16360,20 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_L9_bool_binop_done:; __pyx_v_stop = __pyx_t_10; - /* "View.MemoryView":762 + /* "View.MemoryView":764 * start = index.start or 0 * stop = index.stop or 0 * step = index.step or 0 # <<<<<<<<<<<<<< * * have_start = index.start is not None */ - __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_step); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 762, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_step); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 764, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 762, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(1, 764, __pyx_L1_error) if (!__pyx_t_1) { __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; } else { - __pyx_t_12 = __Pyx_PyIndex_AsSsize_t(__pyx_t_9); if (unlikely((__pyx_t_12 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 762, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyIndex_AsSsize_t(__pyx_t_9); if (unlikely((__pyx_t_12 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 764, __pyx_L1_error) __pyx_t_10 = __pyx_t_12; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; goto __pyx_L11_bool_binop_done; @@ -16319,55 +16382,55 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_L11_bool_binop_done:; __pyx_v_step = __pyx_t_10; - /* "View.MemoryView":764 + /* "View.MemoryView":766 * step = index.step or 0 * * have_start = index.start is not None # <<<<<<<<<<<<<< * have_stop = index.stop is not None * have_step = index.step is not None */ - __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_start); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 764, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_start); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 766, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __pyx_t_1 = (__pyx_t_9 != Py_None); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __pyx_v_have_start = __pyx_t_1; - /* "View.MemoryView":765 + /* "View.MemoryView":767 * * have_start = index.start is not None * have_stop = index.stop is not None # <<<<<<<<<<<<<< * have_step = index.step is not None * */ - __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_stop); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 765, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_stop); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 767, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __pyx_t_1 = (__pyx_t_9 != Py_None); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __pyx_v_have_stop = __pyx_t_1; - /* "View.MemoryView":766 + /* "View.MemoryView":768 * have_start = index.start is not None * have_stop = index.stop is not None * have_step = index.step is not None # <<<<<<<<<<<<<< * * slice_memviewslice( */ - __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_step); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 766, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_index, __pyx_n_s_step); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 768, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __pyx_t_1 = (__pyx_t_9 != Py_None); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __pyx_v_have_step = __pyx_t_1; - /* "View.MemoryView":768 + /* "View.MemoryView":770 * have_step = index.step is not None * * slice_memviewslice( # <<<<<<<<<<<<<< * p_dst, p_src.shape[dim], p_src.strides[dim], p_src.suboffsets[dim], * dim, new_ndim, p_suboffset_dim, */ - __pyx_t_11 = __pyx_memoryview_slice_memviewslice(__pyx_v_p_dst, (__pyx_v_p_src->shape[__pyx_v_dim]), (__pyx_v_p_src->strides[__pyx_v_dim]), (__pyx_v_p_src->suboffsets[__pyx_v_dim]), __pyx_v_dim, __pyx_v_new_ndim, __pyx_v_p_suboffset_dim, __pyx_v_start, __pyx_v_stop, __pyx_v_step, __pyx_v_have_start, __pyx_v_have_stop, __pyx_v_have_step, 1); if (unlikely(__pyx_t_11 == ((int)-1))) __PYX_ERR(1, 768, __pyx_L1_error) + __pyx_t_11 = __pyx_memoryview_slice_memviewslice(__pyx_v_p_dst, (__pyx_v_p_src->shape[__pyx_v_dim]), (__pyx_v_p_src->strides[__pyx_v_dim]), (__pyx_v_p_src->suboffsets[__pyx_v_dim]), __pyx_v_dim, __pyx_v_new_ndim, __pyx_v_p_suboffset_dim, __pyx_v_start, __pyx_v_stop, __pyx_v_step, __pyx_v_have_start, __pyx_v_have_stop, __pyx_v_have_step, 1); if (unlikely(__pyx_t_11 == ((int)-1))) __PYX_ERR(1, 770, __pyx_L1_error) - /* "View.MemoryView":774 + /* "View.MemoryView":776 * have_start, have_stop, have_step, * True) * new_ndim += 1 # <<<<<<<<<<<<<< @@ -16378,7 +16441,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ } __pyx_L6:; - /* "View.MemoryView":746 + /* "View.MemoryView":748 * cdef bint have_start, have_stop, have_step * * for dim, index in enumerate(indices): # <<<<<<<<<<<<<< @@ -16388,7 +16451,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ } __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "View.MemoryView":776 + /* "View.MemoryView":778 * new_ndim += 1 * * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< @@ -16399,7 +16462,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "View.MemoryView":777 + /* "View.MemoryView":779 * * if isinstance(memview, _memoryviewslice): * return memoryview_fromslice(dst, new_ndim, # <<<<<<<<<<<<<< @@ -16408,39 +16471,39 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ */ __Pyx_XDECREF(((PyObject *)__pyx_r)); - /* "View.MemoryView":778 + /* "View.MemoryView":780 * if isinstance(memview, _memoryviewslice): * return memoryview_fromslice(dst, new_ndim, * memviewsliceobj.to_object_func, # <<<<<<<<<<<<<< * memviewsliceobj.to_dtype_func, * memview.dtype_is_object) */ - if (unlikely(!__pyx_v_memviewsliceobj)) { __Pyx_RaiseUnboundLocalError("memviewsliceobj"); __PYX_ERR(1, 778, __pyx_L1_error) } + if (unlikely(!__pyx_v_memviewsliceobj)) { __Pyx_RaiseUnboundLocalError("memviewsliceobj"); __PYX_ERR(1, 780, __pyx_L1_error) } - /* "View.MemoryView":779 + /* "View.MemoryView":781 * return memoryview_fromslice(dst, new_ndim, * memviewsliceobj.to_object_func, * memviewsliceobj.to_dtype_func, # <<<<<<<<<<<<<< * memview.dtype_is_object) * else: */ - if (unlikely(!__pyx_v_memviewsliceobj)) { __Pyx_RaiseUnboundLocalError("memviewsliceobj"); __PYX_ERR(1, 779, __pyx_L1_error) } + if (unlikely(!__pyx_v_memviewsliceobj)) { __Pyx_RaiseUnboundLocalError("memviewsliceobj"); __PYX_ERR(1, 781, __pyx_L1_error) } - /* "View.MemoryView":777 + /* "View.MemoryView":779 * * if isinstance(memview, _memoryviewslice): * return memoryview_fromslice(dst, new_ndim, # <<<<<<<<<<<<<< * memviewsliceobj.to_object_func, * memviewsliceobj.to_dtype_func, */ - __pyx_t_3 = __pyx_memoryview_fromslice(__pyx_v_dst, __pyx_v_new_ndim, __pyx_v_memviewsliceobj->to_object_func, __pyx_v_memviewsliceobj->to_dtype_func, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 777, __pyx_L1_error) + __pyx_t_3 = __pyx_memoryview_fromslice(__pyx_v_dst, __pyx_v_new_ndim, __pyx_v_memviewsliceobj->to_object_func, __pyx_v_memviewsliceobj->to_dtype_func, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 779, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_memoryview_type))))) __PYX_ERR(1, 777, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_memoryview_type))))) __PYX_ERR(1, 779, __pyx_L1_error) __pyx_r = ((struct __pyx_memoryview_obj *)__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L0; - /* "View.MemoryView":776 + /* "View.MemoryView":778 * new_ndim += 1 * * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< @@ -16449,7 +16512,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ */ } - /* "View.MemoryView":782 + /* "View.MemoryView":784 * memview.dtype_is_object) * else: * return memoryview_fromslice(dst, new_ndim, NULL, NULL, # <<<<<<<<<<<<<< @@ -16459,30 +16522,30 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ /*else*/ { __Pyx_XDECREF(((PyObject *)__pyx_r)); - /* "View.MemoryView":783 + /* "View.MemoryView":785 * else: * return memoryview_fromslice(dst, new_ndim, NULL, NULL, * memview.dtype_is_object) # <<<<<<<<<<<<<< * * */ - __pyx_t_3 = __pyx_memoryview_fromslice(__pyx_v_dst, __pyx_v_new_ndim, NULL, NULL, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 782, __pyx_L1_error) + __pyx_t_3 = __pyx_memoryview_fromslice(__pyx_v_dst, __pyx_v_new_ndim, NULL, NULL, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 784, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - /* "View.MemoryView":782 + /* "View.MemoryView":784 * memview.dtype_is_object) * else: * return memoryview_fromslice(dst, new_ndim, NULL, NULL, # <<<<<<<<<<<<<< * memview.dtype_is_object) * */ - if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_memoryview_type))))) __PYX_ERR(1, 782, __pyx_L1_error) + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_memoryview_type))))) __PYX_ERR(1, 784, __pyx_L1_error) __pyx_r = ((struct __pyx_memoryview_obj *)__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L0; } - /* "View.MemoryView":710 + /* "View.MemoryView":712 * * @cname('__pyx_memview_slice') * cdef memoryview memview_slice(memoryview memview, object indices): # <<<<<<<<<<<<<< @@ -16504,7 +16567,7 @@ static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_ return __pyx_r; } -/* "View.MemoryView":807 +/* "View.MemoryView":809 * * @cname('__pyx_memoryview_slice_memviewslice') * cdef int slice_memviewslice( # <<<<<<<<<<<<<< @@ -16523,7 +16586,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, const char *__pyx_filename = NULL; int __pyx_clineno = 0; - /* "View.MemoryView":827 + /* "View.MemoryView":829 * cdef bint negative_step * * if not is_slice: # <<<<<<<<<<<<<< @@ -16533,7 +16596,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_1 = ((!(__pyx_v_is_slice != 0)) != 0); if (__pyx_t_1) { - /* "View.MemoryView":829 + /* "View.MemoryView":831 * if not is_slice: * * if start < 0: # <<<<<<<<<<<<<< @@ -16543,7 +16606,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_1 = ((__pyx_v_start < 0) != 0); if (__pyx_t_1) { - /* "View.MemoryView":830 + /* "View.MemoryView":832 * * if start < 0: * start += shape # <<<<<<<<<<<<<< @@ -16552,7 +16615,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_start = (__pyx_v_start + __pyx_v_shape); - /* "View.MemoryView":829 + /* "View.MemoryView":831 * if not is_slice: * * if start < 0: # <<<<<<<<<<<<<< @@ -16561,7 +16624,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ } - /* "View.MemoryView":831 + /* "View.MemoryView":833 * if start < 0: * start += shape * if not 0 <= start < shape: # <<<<<<<<<<<<<< @@ -16575,16 +16638,16 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((!(__pyx_t_1 != 0)) != 0); if (__pyx_t_2) { - /* "View.MemoryView":832 + /* "View.MemoryView":834 * start += shape * if not 0 <= start < shape: * _err_dim(IndexError, "Index out of bounds (axis %d)", dim) # <<<<<<<<<<<<<< * else: * */ - __pyx_t_3 = __pyx_memoryview_err_dim(__pyx_builtin_IndexError, ((char *)"Index out of bounds (axis %d)"), __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 832, __pyx_L1_error) + __pyx_t_3 = __pyx_memoryview_err_dim(__pyx_builtin_IndexError, ((char *)"Index out of bounds (axis %d)"), __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 834, __pyx_L1_error) - /* "View.MemoryView":831 + /* "View.MemoryView":833 * if start < 0: * start += shape * if not 0 <= start < shape: # <<<<<<<<<<<<<< @@ -16593,7 +16656,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ } - /* "View.MemoryView":827 + /* "View.MemoryView":829 * cdef bint negative_step * * if not is_slice: # <<<<<<<<<<<<<< @@ -16603,7 +16666,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, goto __pyx_L3; } - /* "View.MemoryView":835 + /* "View.MemoryView":837 * else: * * negative_step = have_step != 0 and step < 0 # <<<<<<<<<<<<<< @@ -16622,7 +16685,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_L6_bool_binop_done:; __pyx_v_negative_step = __pyx_t_2; - /* "View.MemoryView":837 + /* "View.MemoryView":839 * negative_step = have_step != 0 and step < 0 * * if have_step and step == 0: # <<<<<<<<<<<<<< @@ -16640,16 +16703,16 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_L9_bool_binop_done:; if (__pyx_t_2) { - /* "View.MemoryView":838 + /* "View.MemoryView":840 * * if have_step and step == 0: * _err_dim(ValueError, "Step may not be zero (axis %d)", dim) # <<<<<<<<<<<<<< * * */ - __pyx_t_3 = __pyx_memoryview_err_dim(__pyx_builtin_ValueError, ((char *)"Step may not be zero (axis %d)"), __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 838, __pyx_L1_error) + __pyx_t_3 = __pyx_memoryview_err_dim(__pyx_builtin_ValueError, ((char *)"Step may not be zero (axis %d)"), __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 840, __pyx_L1_error) - /* "View.MemoryView":837 + /* "View.MemoryView":839 * negative_step = have_step != 0 and step < 0 * * if have_step and step == 0: # <<<<<<<<<<<<<< @@ -16658,7 +16721,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ } - /* "View.MemoryView":841 + /* "View.MemoryView":843 * * * if have_start: # <<<<<<<<<<<<<< @@ -16668,7 +16731,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = (__pyx_v_have_start != 0); if (__pyx_t_2) { - /* "View.MemoryView":842 + /* "View.MemoryView":844 * * if have_start: * if start < 0: # <<<<<<<<<<<<<< @@ -16678,7 +16741,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((__pyx_v_start < 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":843 + /* "View.MemoryView":845 * if have_start: * if start < 0: * start += shape # <<<<<<<<<<<<<< @@ -16687,7 +16750,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_start = (__pyx_v_start + __pyx_v_shape); - /* "View.MemoryView":844 + /* "View.MemoryView":846 * if start < 0: * start += shape * if start < 0: # <<<<<<<<<<<<<< @@ -16697,7 +16760,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((__pyx_v_start < 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":845 + /* "View.MemoryView":847 * start += shape * if start < 0: * start = 0 # <<<<<<<<<<<<<< @@ -16706,7 +16769,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_start = 0; - /* "View.MemoryView":844 + /* "View.MemoryView":846 * if start < 0: * start += shape * if start < 0: # <<<<<<<<<<<<<< @@ -16715,7 +16778,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ } - /* "View.MemoryView":842 + /* "View.MemoryView":844 * * if have_start: * if start < 0: # <<<<<<<<<<<<<< @@ -16725,7 +16788,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, goto __pyx_L12; } - /* "View.MemoryView":846 + /* "View.MemoryView":848 * if start < 0: * start = 0 * elif start >= shape: # <<<<<<<<<<<<<< @@ -16735,7 +16798,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((__pyx_v_start >= __pyx_v_shape) != 0); if (__pyx_t_2) { - /* "View.MemoryView":847 + /* "View.MemoryView":849 * start = 0 * elif start >= shape: * if negative_step: # <<<<<<<<<<<<<< @@ -16745,7 +16808,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = (__pyx_v_negative_step != 0); if (__pyx_t_2) { - /* "View.MemoryView":848 + /* "View.MemoryView":850 * elif start >= shape: * if negative_step: * start = shape - 1 # <<<<<<<<<<<<<< @@ -16754,7 +16817,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_start = (__pyx_v_shape - 1); - /* "View.MemoryView":847 + /* "View.MemoryView":849 * start = 0 * elif start >= shape: * if negative_step: # <<<<<<<<<<<<<< @@ -16764,7 +16827,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, goto __pyx_L14; } - /* "View.MemoryView":850 + /* "View.MemoryView":852 * start = shape - 1 * else: * start = shape # <<<<<<<<<<<<<< @@ -16776,7 +16839,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, } __pyx_L14:; - /* "View.MemoryView":846 + /* "View.MemoryView":848 * if start < 0: * start = 0 * elif start >= shape: # <<<<<<<<<<<<<< @@ -16786,7 +16849,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, } __pyx_L12:; - /* "View.MemoryView":841 + /* "View.MemoryView":843 * * * if have_start: # <<<<<<<<<<<<<< @@ -16796,7 +16859,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, goto __pyx_L11; } - /* "View.MemoryView":852 + /* "View.MemoryView":854 * start = shape * else: * if negative_step: # <<<<<<<<<<<<<< @@ -16807,7 +16870,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = (__pyx_v_negative_step != 0); if (__pyx_t_2) { - /* "View.MemoryView":853 + /* "View.MemoryView":855 * else: * if negative_step: * start = shape - 1 # <<<<<<<<<<<<<< @@ -16816,7 +16879,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_start = (__pyx_v_shape - 1); - /* "View.MemoryView":852 + /* "View.MemoryView":854 * start = shape * else: * if negative_step: # <<<<<<<<<<<<<< @@ -16826,7 +16889,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, goto __pyx_L15; } - /* "View.MemoryView":855 + /* "View.MemoryView":857 * start = shape - 1 * else: * start = 0 # <<<<<<<<<<<<<< @@ -16840,7 +16903,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, } __pyx_L11:; - /* "View.MemoryView":857 + /* "View.MemoryView":859 * start = 0 * * if have_stop: # <<<<<<<<<<<<<< @@ -16850,7 +16913,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = (__pyx_v_have_stop != 0); if (__pyx_t_2) { - /* "View.MemoryView":858 + /* "View.MemoryView":860 * * if have_stop: * if stop < 0: # <<<<<<<<<<<<<< @@ -16860,7 +16923,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((__pyx_v_stop < 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":859 + /* "View.MemoryView":861 * if have_stop: * if stop < 0: * stop += shape # <<<<<<<<<<<<<< @@ -16869,7 +16932,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_stop = (__pyx_v_stop + __pyx_v_shape); - /* "View.MemoryView":860 + /* "View.MemoryView":862 * if stop < 0: * stop += shape * if stop < 0: # <<<<<<<<<<<<<< @@ -16879,7 +16942,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((__pyx_v_stop < 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":861 + /* "View.MemoryView":863 * stop += shape * if stop < 0: * stop = 0 # <<<<<<<<<<<<<< @@ -16888,7 +16951,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_stop = 0; - /* "View.MemoryView":860 + /* "View.MemoryView":862 * if stop < 0: * stop += shape * if stop < 0: # <<<<<<<<<<<<<< @@ -16897,7 +16960,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ } - /* "View.MemoryView":858 + /* "View.MemoryView":860 * * if have_stop: * if stop < 0: # <<<<<<<<<<<<<< @@ -16907,7 +16970,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, goto __pyx_L17; } - /* "View.MemoryView":862 + /* "View.MemoryView":864 * if stop < 0: * stop = 0 * elif stop > shape: # <<<<<<<<<<<<<< @@ -16917,7 +16980,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((__pyx_v_stop > __pyx_v_shape) != 0); if (__pyx_t_2) { - /* "View.MemoryView":863 + /* "View.MemoryView":865 * stop = 0 * elif stop > shape: * stop = shape # <<<<<<<<<<<<<< @@ -16926,7 +16989,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_stop = __pyx_v_shape; - /* "View.MemoryView":862 + /* "View.MemoryView":864 * if stop < 0: * stop = 0 * elif stop > shape: # <<<<<<<<<<<<<< @@ -16936,7 +16999,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, } __pyx_L17:; - /* "View.MemoryView":857 + /* "View.MemoryView":859 * start = 0 * * if have_stop: # <<<<<<<<<<<<<< @@ -16946,7 +17009,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, goto __pyx_L16; } - /* "View.MemoryView":865 + /* "View.MemoryView":867 * stop = shape * else: * if negative_step: # <<<<<<<<<<<<<< @@ -16957,7 +17020,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = (__pyx_v_negative_step != 0); if (__pyx_t_2) { - /* "View.MemoryView":866 + /* "View.MemoryView":868 * else: * if negative_step: * stop = -1 # <<<<<<<<<<<<<< @@ -16966,7 +17029,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_stop = -1L; - /* "View.MemoryView":865 + /* "View.MemoryView":867 * stop = shape * else: * if negative_step: # <<<<<<<<<<<<<< @@ -16976,7 +17039,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, goto __pyx_L19; } - /* "View.MemoryView":868 + /* "View.MemoryView":870 * stop = -1 * else: * stop = shape # <<<<<<<<<<<<<< @@ -16990,7 +17053,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, } __pyx_L16:; - /* "View.MemoryView":870 + /* "View.MemoryView":872 * stop = shape * * if not have_step: # <<<<<<<<<<<<<< @@ -17000,7 +17063,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((!(__pyx_v_have_step != 0)) != 0); if (__pyx_t_2) { - /* "View.MemoryView":871 + /* "View.MemoryView":873 * * if not have_step: * step = 1 # <<<<<<<<<<<<<< @@ -17009,7 +17072,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_step = 1; - /* "View.MemoryView":870 + /* "View.MemoryView":872 * stop = shape * * if not have_step: # <<<<<<<<<<<<<< @@ -17018,7 +17081,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ } - /* "View.MemoryView":875 + /* "View.MemoryView":877 * * with cython.cdivision(True): * new_shape = (stop - start) // step # <<<<<<<<<<<<<< @@ -17027,7 +17090,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_new_shape = ((__pyx_v_stop - __pyx_v_start) / __pyx_v_step); - /* "View.MemoryView":877 + /* "View.MemoryView":879 * new_shape = (stop - start) // step * * if (stop - start) - step * new_shape: # <<<<<<<<<<<<<< @@ -17037,7 +17100,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = (((__pyx_v_stop - __pyx_v_start) - (__pyx_v_step * __pyx_v_new_shape)) != 0); if (__pyx_t_2) { - /* "View.MemoryView":878 + /* "View.MemoryView":880 * * if (stop - start) - step * new_shape: * new_shape += 1 # <<<<<<<<<<<<<< @@ -17046,7 +17109,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_new_shape = (__pyx_v_new_shape + 1); - /* "View.MemoryView":877 + /* "View.MemoryView":879 * new_shape = (stop - start) // step * * if (stop - start) - step * new_shape: # <<<<<<<<<<<<<< @@ -17055,7 +17118,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ } - /* "View.MemoryView":880 + /* "View.MemoryView":882 * new_shape += 1 * * if new_shape < 0: # <<<<<<<<<<<<<< @@ -17065,7 +17128,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((__pyx_v_new_shape < 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":881 + /* "View.MemoryView":883 * * if new_shape < 0: * new_shape = 0 # <<<<<<<<<<<<<< @@ -17074,7 +17137,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_new_shape = 0; - /* "View.MemoryView":880 + /* "View.MemoryView":882 * new_shape += 1 * * if new_shape < 0: # <<<<<<<<<<<<<< @@ -17083,7 +17146,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ } - /* "View.MemoryView":884 + /* "View.MemoryView":886 * * * dst.strides[new_ndim] = stride * step # <<<<<<<<<<<<<< @@ -17092,7 +17155,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ (__pyx_v_dst->strides[__pyx_v_new_ndim]) = (__pyx_v_stride * __pyx_v_step); - /* "View.MemoryView":885 + /* "View.MemoryView":887 * * dst.strides[new_ndim] = stride * step * dst.shape[new_ndim] = new_shape # <<<<<<<<<<<<<< @@ -17101,7 +17164,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ (__pyx_v_dst->shape[__pyx_v_new_ndim]) = __pyx_v_new_shape; - /* "View.MemoryView":886 + /* "View.MemoryView":888 * dst.strides[new_ndim] = stride * step * dst.shape[new_ndim] = new_shape * dst.suboffsets[new_ndim] = suboffset # <<<<<<<<<<<<<< @@ -17112,7 +17175,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, } __pyx_L3:; - /* "View.MemoryView":889 + /* "View.MemoryView":891 * * * if suboffset_dim[0] < 0: # <<<<<<<<<<<<<< @@ -17122,7 +17185,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = (((__pyx_v_suboffset_dim[0]) < 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":890 + /* "View.MemoryView":892 * * if suboffset_dim[0] < 0: * dst.data += start * stride # <<<<<<<<<<<<<< @@ -17131,7 +17194,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_dst->data = (__pyx_v_dst->data + (__pyx_v_start * __pyx_v_stride)); - /* "View.MemoryView":889 + /* "View.MemoryView":891 * * * if suboffset_dim[0] < 0: # <<<<<<<<<<<<<< @@ -17141,7 +17204,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, goto __pyx_L23; } - /* "View.MemoryView":892 + /* "View.MemoryView":894 * dst.data += start * stride * else: * dst.suboffsets[suboffset_dim[0]] += start * stride # <<<<<<<<<<<<<< @@ -17154,7 +17217,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, } __pyx_L23:; - /* "View.MemoryView":894 + /* "View.MemoryView":896 * dst.suboffsets[suboffset_dim[0]] += start * stride * * if suboffset >= 0: # <<<<<<<<<<<<<< @@ -17164,7 +17227,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((__pyx_v_suboffset >= 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":895 + /* "View.MemoryView":897 * * if suboffset >= 0: * if not is_slice: # <<<<<<<<<<<<<< @@ -17174,7 +17237,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((!(__pyx_v_is_slice != 0)) != 0); if (__pyx_t_2) { - /* "View.MemoryView":896 + /* "View.MemoryView":898 * if suboffset >= 0: * if not is_slice: * if new_ndim == 0: # <<<<<<<<<<<<<< @@ -17184,7 +17247,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_t_2 = ((__pyx_v_new_ndim == 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":897 + /* "View.MemoryView":899 * if not is_slice: * if new_ndim == 0: * dst.data = ( dst.data)[0] + suboffset # <<<<<<<<<<<<<< @@ -17193,7 +17256,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ __pyx_v_dst->data = ((((char **)__pyx_v_dst->data)[0]) + __pyx_v_suboffset); - /* "View.MemoryView":896 + /* "View.MemoryView":898 * if suboffset >= 0: * if not is_slice: * if new_ndim == 0: # <<<<<<<<<<<<<< @@ -17203,7 +17266,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, goto __pyx_L26; } - /* "View.MemoryView":899 + /* "View.MemoryView":901 * dst.data = ( dst.data)[0] + suboffset * else: * _err_dim(IndexError, "All dimensions preceding dimension %d " # <<<<<<<<<<<<<< @@ -17212,18 +17275,18 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ /*else*/ { - /* "View.MemoryView":900 + /* "View.MemoryView":902 * else: * _err_dim(IndexError, "All dimensions preceding dimension %d " * "must be indexed and not sliced", dim) # <<<<<<<<<<<<<< * else: * suboffset_dim[0] = new_ndim */ - __pyx_t_3 = __pyx_memoryview_err_dim(__pyx_builtin_IndexError, ((char *)"All dimensions preceding dimension %d must be indexed and not sliced"), __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 899, __pyx_L1_error) + __pyx_t_3 = __pyx_memoryview_err_dim(__pyx_builtin_IndexError, ((char *)"All dimensions preceding dimension %d must be indexed and not sliced"), __pyx_v_dim); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 901, __pyx_L1_error) } __pyx_L26:; - /* "View.MemoryView":895 + /* "View.MemoryView":897 * * if suboffset >= 0: * if not is_slice: # <<<<<<<<<<<<<< @@ -17233,7 +17296,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, goto __pyx_L25; } - /* "View.MemoryView":902 + /* "View.MemoryView":904 * "must be indexed and not sliced", dim) * else: * suboffset_dim[0] = new_ndim # <<<<<<<<<<<<<< @@ -17245,7 +17308,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, } __pyx_L25:; - /* "View.MemoryView":894 + /* "View.MemoryView":896 * dst.suboffsets[suboffset_dim[0]] += start * stride * * if suboffset >= 0: # <<<<<<<<<<<<<< @@ -17254,7 +17317,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, */ } - /* "View.MemoryView":904 + /* "View.MemoryView":906 * suboffset_dim[0] = new_ndim * * return 0 # <<<<<<<<<<<<<< @@ -17264,7 +17327,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, __pyx_r = 0; goto __pyx_L0; - /* "View.MemoryView":807 + /* "View.MemoryView":809 * * @cname('__pyx_memoryview_slice_memviewslice') * cdef int slice_memviewslice( # <<<<<<<<<<<<<< @@ -17288,7 +17351,7 @@ static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *__pyx_v_dst, return __pyx_r; } -/* "View.MemoryView":910 +/* "View.MemoryView":912 * * @cname('__pyx_pybuffer_index') * cdef char *pybuffer_index(Py_buffer *view, char *bufp, Py_ssize_t index, # <<<<<<<<<<<<<< @@ -17313,7 +17376,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P int __pyx_clineno = 0; __Pyx_RefNannySetupContext("pybuffer_index", 0); - /* "View.MemoryView":912 + /* "View.MemoryView":914 * cdef char *pybuffer_index(Py_buffer *view, char *bufp, Py_ssize_t index, * Py_ssize_t dim) except NULL: * cdef Py_ssize_t shape, stride, suboffset = -1 # <<<<<<<<<<<<<< @@ -17322,7 +17385,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ __pyx_v_suboffset = -1L; - /* "View.MemoryView":913 + /* "View.MemoryView":915 * Py_ssize_t dim) except NULL: * cdef Py_ssize_t shape, stride, suboffset = -1 * cdef Py_ssize_t itemsize = view.itemsize # <<<<<<<<<<<<<< @@ -17332,7 +17395,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P __pyx_t_1 = __pyx_v_view->itemsize; __pyx_v_itemsize = __pyx_t_1; - /* "View.MemoryView":916 + /* "View.MemoryView":918 * cdef char *resultp * * if view.ndim == 0: # <<<<<<<<<<<<<< @@ -17342,7 +17405,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P __pyx_t_2 = ((__pyx_v_view->ndim == 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":917 + /* "View.MemoryView":919 * * if view.ndim == 0: * shape = view.len / itemsize # <<<<<<<<<<<<<< @@ -17351,15 +17414,15 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ if (unlikely(__pyx_v_itemsize == 0)) { PyErr_SetString(PyExc_ZeroDivisionError, "integer division or modulo by zero"); - __PYX_ERR(1, 917, __pyx_L1_error) + __PYX_ERR(1, 919, __pyx_L1_error) } else if (sizeof(Py_ssize_t) == sizeof(long) && (!(((Py_ssize_t)-1) > 0)) && unlikely(__pyx_v_itemsize == (Py_ssize_t)-1) && unlikely(UNARY_NEG_WOULD_OVERFLOW(__pyx_v_view->len))) { PyErr_SetString(PyExc_OverflowError, "value too large to perform division"); - __PYX_ERR(1, 917, __pyx_L1_error) + __PYX_ERR(1, 919, __pyx_L1_error) } __pyx_v_shape = __Pyx_div_Py_ssize_t(__pyx_v_view->len, __pyx_v_itemsize); - /* "View.MemoryView":918 + /* "View.MemoryView":920 * if view.ndim == 0: * shape = view.len / itemsize * stride = itemsize # <<<<<<<<<<<<<< @@ -17368,7 +17431,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ __pyx_v_stride = __pyx_v_itemsize; - /* "View.MemoryView":916 + /* "View.MemoryView":918 * cdef char *resultp * * if view.ndim == 0: # <<<<<<<<<<<<<< @@ -17378,7 +17441,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P goto __pyx_L3; } - /* "View.MemoryView":920 + /* "View.MemoryView":922 * stride = itemsize * else: * shape = view.shape[dim] # <<<<<<<<<<<<<< @@ -17388,7 +17451,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P /*else*/ { __pyx_v_shape = (__pyx_v_view->shape[__pyx_v_dim]); - /* "View.MemoryView":921 + /* "View.MemoryView":923 * else: * shape = view.shape[dim] * stride = view.strides[dim] # <<<<<<<<<<<<<< @@ -17397,7 +17460,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ __pyx_v_stride = (__pyx_v_view->strides[__pyx_v_dim]); - /* "View.MemoryView":922 + /* "View.MemoryView":924 * shape = view.shape[dim] * stride = view.strides[dim] * if view.suboffsets != NULL: # <<<<<<<<<<<<<< @@ -17407,7 +17470,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P __pyx_t_2 = ((__pyx_v_view->suboffsets != NULL) != 0); if (__pyx_t_2) { - /* "View.MemoryView":923 + /* "View.MemoryView":925 * stride = view.strides[dim] * if view.suboffsets != NULL: * suboffset = view.suboffsets[dim] # <<<<<<<<<<<<<< @@ -17416,7 +17479,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ __pyx_v_suboffset = (__pyx_v_view->suboffsets[__pyx_v_dim]); - /* "View.MemoryView":922 + /* "View.MemoryView":924 * shape = view.shape[dim] * stride = view.strides[dim] * if view.suboffsets != NULL: # <<<<<<<<<<<<<< @@ -17427,7 +17490,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P } __pyx_L3:; - /* "View.MemoryView":925 + /* "View.MemoryView":927 * suboffset = view.suboffsets[dim] * * if index < 0: # <<<<<<<<<<<<<< @@ -17437,7 +17500,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P __pyx_t_2 = ((__pyx_v_index < 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":926 + /* "View.MemoryView":928 * * if index < 0: * index += view.shape[dim] # <<<<<<<<<<<<<< @@ -17446,7 +17509,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ __pyx_v_index = (__pyx_v_index + (__pyx_v_view->shape[__pyx_v_dim])); - /* "View.MemoryView":927 + /* "View.MemoryView":929 * if index < 0: * index += view.shape[dim] * if index < 0: # <<<<<<<<<<<<<< @@ -17456,26 +17519,26 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P __pyx_t_2 = ((__pyx_v_index < 0) != 0); if (unlikely(__pyx_t_2)) { - /* "View.MemoryView":928 + /* "View.MemoryView":930 * index += view.shape[dim] * if index < 0: * raise IndexError("Out of bounds on buffer access (axis %d)" % dim) # <<<<<<<<<<<<<< * * if index >= shape: */ - __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_dim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 928, __pyx_L1_error) + __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_dim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 930, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_Out_of_bounds_on_buffer_access_a, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 928, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_Out_of_bounds_on_buffer_access_a, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 930, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_builtin_IndexError, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 928, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_builtin_IndexError, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 930, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __PYX_ERR(1, 928, __pyx_L1_error) + __PYX_ERR(1, 930, __pyx_L1_error) - /* "View.MemoryView":927 + /* "View.MemoryView":929 * if index < 0: * index += view.shape[dim] * if index < 0: # <<<<<<<<<<<<<< @@ -17484,7 +17547,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ } - /* "View.MemoryView":925 + /* "View.MemoryView":927 * suboffset = view.suboffsets[dim] * * if index < 0: # <<<<<<<<<<<<<< @@ -17493,7 +17556,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ } - /* "View.MemoryView":930 + /* "View.MemoryView":932 * raise IndexError("Out of bounds on buffer access (axis %d)" % dim) * * if index >= shape: # <<<<<<<<<<<<<< @@ -17503,26 +17566,26 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P __pyx_t_2 = ((__pyx_v_index >= __pyx_v_shape) != 0); if (unlikely(__pyx_t_2)) { - /* "View.MemoryView":931 + /* "View.MemoryView":933 * * if index >= shape: * raise IndexError("Out of bounds on buffer access (axis %d)" % dim) # <<<<<<<<<<<<<< * * resultp = bufp + index * stride */ - __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_dim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 931, __pyx_L1_error) + __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_dim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 933, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_Out_of_bounds_on_buffer_access_a, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 931, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_Out_of_bounds_on_buffer_access_a, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 933, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_builtin_IndexError, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 931, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_builtin_IndexError, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 933, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __PYX_ERR(1, 931, __pyx_L1_error) + __PYX_ERR(1, 933, __pyx_L1_error) - /* "View.MemoryView":930 + /* "View.MemoryView":932 * raise IndexError("Out of bounds on buffer access (axis %d)" % dim) * * if index >= shape: # <<<<<<<<<<<<<< @@ -17531,7 +17594,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ } - /* "View.MemoryView":933 + /* "View.MemoryView":935 * raise IndexError("Out of bounds on buffer access (axis %d)" % dim) * * resultp = bufp + index * stride # <<<<<<<<<<<<<< @@ -17540,7 +17603,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ __pyx_v_resultp = (__pyx_v_bufp + (__pyx_v_index * __pyx_v_stride)); - /* "View.MemoryView":934 + /* "View.MemoryView":936 * * resultp = bufp + index * stride * if suboffset >= 0: # <<<<<<<<<<<<<< @@ -17550,7 +17613,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P __pyx_t_2 = ((__pyx_v_suboffset >= 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":935 + /* "View.MemoryView":937 * resultp = bufp + index * stride * if suboffset >= 0: * resultp = ( resultp)[0] + suboffset # <<<<<<<<<<<<<< @@ -17559,7 +17622,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ __pyx_v_resultp = ((((char **)__pyx_v_resultp)[0]) + __pyx_v_suboffset); - /* "View.MemoryView":934 + /* "View.MemoryView":936 * * resultp = bufp + index * stride * if suboffset >= 0: # <<<<<<<<<<<<<< @@ -17568,7 +17631,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P */ } - /* "View.MemoryView":937 + /* "View.MemoryView":939 * resultp = ( resultp)[0] + suboffset * * return resultp # <<<<<<<<<<<<<< @@ -17578,7 +17641,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P __pyx_r = __pyx_v_resultp; goto __pyx_L0; - /* "View.MemoryView":910 + /* "View.MemoryView":912 * * @cname('__pyx_pybuffer_index') * cdef char *pybuffer_index(Py_buffer *view, char *bufp, Py_ssize_t index, # <<<<<<<<<<<<<< @@ -17597,7 +17660,7 @@ static char *__pyx_pybuffer_index(Py_buffer *__pyx_v_view, char *__pyx_v_bufp, P return __pyx_r; } -/* "View.MemoryView":943 +/* "View.MemoryView":945 * * @cname('__pyx_memslice_transpose') * cdef int transpose_memslice(__Pyx_memviewslice *memslice) nogil except 0: # <<<<<<<<<<<<<< @@ -17625,7 +17688,7 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { const char *__pyx_filename = NULL; int __pyx_clineno = 0; - /* "View.MemoryView":944 + /* "View.MemoryView":946 * @cname('__pyx_memslice_transpose') * cdef int transpose_memslice(__Pyx_memviewslice *memslice) nogil except 0: * cdef int ndim = memslice.memview.view.ndim # <<<<<<<<<<<<<< @@ -17635,7 +17698,7 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { __pyx_t_1 = __pyx_v_memslice->memview->view.ndim; __pyx_v_ndim = __pyx_t_1; - /* "View.MemoryView":946 + /* "View.MemoryView":948 * cdef int ndim = memslice.memview.view.ndim * * cdef Py_ssize_t *shape = memslice.shape # <<<<<<<<<<<<<< @@ -17645,7 +17708,7 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { __pyx_t_2 = __pyx_v_memslice->shape; __pyx_v_shape = __pyx_t_2; - /* "View.MemoryView":947 + /* "View.MemoryView":949 * * cdef Py_ssize_t *shape = memslice.shape * cdef Py_ssize_t *strides = memslice.strides # <<<<<<<<<<<<<< @@ -17655,7 +17718,7 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { __pyx_t_2 = __pyx_v_memslice->strides; __pyx_v_strides = __pyx_t_2; - /* "View.MemoryView":951 + /* "View.MemoryView":953 * * cdef int i, j * for i in range(ndim / 2): # <<<<<<<<<<<<<< @@ -17667,7 +17730,7 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_4; __pyx_t_1+=1) { __pyx_v_i = __pyx_t_1; - /* "View.MemoryView":952 + /* "View.MemoryView":954 * cdef int i, j * for i in range(ndim / 2): * j = ndim - 1 - i # <<<<<<<<<<<<<< @@ -17676,7 +17739,7 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { */ __pyx_v_j = ((__pyx_v_ndim - 1) - __pyx_v_i); - /* "View.MemoryView":953 + /* "View.MemoryView":955 * for i in range(ndim / 2): * j = ndim - 1 - i * strides[i], strides[j] = strides[j], strides[i] # <<<<<<<<<<<<<< @@ -17688,7 +17751,7 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { (__pyx_v_strides[__pyx_v_i]) = __pyx_t_5; (__pyx_v_strides[__pyx_v_j]) = __pyx_t_6; - /* "View.MemoryView":954 + /* "View.MemoryView":956 * j = ndim - 1 - i * strides[i], strides[j] = strides[j], strides[i] * shape[i], shape[j] = shape[j], shape[i] # <<<<<<<<<<<<<< @@ -17700,7 +17763,7 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { (__pyx_v_shape[__pyx_v_i]) = __pyx_t_6; (__pyx_v_shape[__pyx_v_j]) = __pyx_t_5; - /* "View.MemoryView":956 + /* "View.MemoryView":958 * shape[i], shape[j] = shape[j], shape[i] * * if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0: # <<<<<<<<<<<<<< @@ -17718,16 +17781,16 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { __pyx_L6_bool_binop_done:; if (__pyx_t_7) { - /* "View.MemoryView":957 + /* "View.MemoryView":959 * * if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0: * _err(ValueError, "Cannot transpose memoryview with indirect dimensions") # <<<<<<<<<<<<<< * * return 1 */ - __pyx_t_9 = __pyx_memoryview_err(__pyx_builtin_ValueError, ((char *)"Cannot transpose memoryview with indirect dimensions")); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(1, 957, __pyx_L1_error) + __pyx_t_9 = __pyx_memoryview_err(__pyx_builtin_ValueError, ((char *)"Cannot transpose memoryview with indirect dimensions")); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(1, 959, __pyx_L1_error) - /* "View.MemoryView":956 + /* "View.MemoryView":958 * shape[i], shape[j] = shape[j], shape[i] * * if memslice.suboffsets[i] >= 0 or memslice.suboffsets[j] >= 0: # <<<<<<<<<<<<<< @@ -17737,7 +17800,7 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { } } - /* "View.MemoryView":959 + /* "View.MemoryView":961 * _err(ValueError, "Cannot transpose memoryview with indirect dimensions") * * return 1 # <<<<<<<<<<<<<< @@ -17747,7 +17810,7 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { __pyx_r = 1; goto __pyx_L0; - /* "View.MemoryView":943 + /* "View.MemoryView":945 * * @cname('__pyx_memslice_transpose') * cdef int transpose_memslice(__Pyx_memviewslice *memslice) nogil except 0: # <<<<<<<<<<<<<< @@ -17771,7 +17834,7 @@ static int __pyx_memslice_transpose(__Pyx_memviewslice *__pyx_v_memslice) { return __pyx_r; } -/* "View.MemoryView":976 +/* "View.MemoryView":978 * cdef int (*to_dtype_func)(char *, object) except 0 * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -17794,7 +17857,7 @@ static void __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memoryviewsl __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__dealloc__", 0); - /* "View.MemoryView":977 + /* "View.MemoryView":979 * * def __dealloc__(self): * __PYX_XDEC_MEMVIEW(&self.from_slice, 1) # <<<<<<<<<<<<<< @@ -17803,7 +17866,7 @@ static void __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memoryviewsl */ __PYX_XDEC_MEMVIEW((&__pyx_v_self->from_slice), 1); - /* "View.MemoryView":976 + /* "View.MemoryView":978 * cdef int (*to_dtype_func)(char *, object) except 0 * * def __dealloc__(self): # <<<<<<<<<<<<<< @@ -17815,7 +17878,7 @@ static void __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memoryviewsl __Pyx_RefNannyFinishContext(); } -/* "View.MemoryView":979 +/* "View.MemoryView":981 * __PYX_XDEC_MEMVIEW(&self.from_slice, 1) * * cdef convert_item_to_object(self, char *itemp): # <<<<<<<<<<<<<< @@ -17833,7 +17896,7 @@ static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __pyx_memor int __pyx_clineno = 0; __Pyx_RefNannySetupContext("convert_item_to_object", 0); - /* "View.MemoryView":980 + /* "View.MemoryView":982 * * cdef convert_item_to_object(self, char *itemp): * if self.to_object_func != NULL: # <<<<<<<<<<<<<< @@ -17843,7 +17906,7 @@ static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __pyx_memor __pyx_t_1 = ((__pyx_v_self->to_object_func != NULL) != 0); if (__pyx_t_1) { - /* "View.MemoryView":981 + /* "View.MemoryView":983 * cdef convert_item_to_object(self, char *itemp): * if self.to_object_func != NULL: * return self.to_object_func(itemp) # <<<<<<<<<<<<<< @@ -17851,13 +17914,13 @@ static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __pyx_memor * return memoryview.convert_item_to_object(self, itemp) */ __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __pyx_v_self->to_object_func(__pyx_v_itemp); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 981, __pyx_L1_error) + __pyx_t_2 = __pyx_v_self->to_object_func(__pyx_v_itemp); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 983, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; - /* "View.MemoryView":980 + /* "View.MemoryView":982 * * cdef convert_item_to_object(self, char *itemp): * if self.to_object_func != NULL: # <<<<<<<<<<<<<< @@ -17866,7 +17929,7 @@ static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __pyx_memor */ } - /* "View.MemoryView":983 + /* "View.MemoryView":985 * return self.to_object_func(itemp) * else: * return memoryview.convert_item_to_object(self, itemp) # <<<<<<<<<<<<<< @@ -17875,14 +17938,14 @@ static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __pyx_memor */ /*else*/ { __Pyx_XDECREF(__pyx_r); - __pyx_t_2 = __pyx_memoryview_convert_item_to_object(((struct __pyx_memoryview_obj *)__pyx_v_self), __pyx_v_itemp); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 983, __pyx_L1_error) + __pyx_t_2 = __pyx_memoryview_convert_item_to_object(((struct __pyx_memoryview_obj *)__pyx_v_self), __pyx_v_itemp); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 985, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0; } - /* "View.MemoryView":979 + /* "View.MemoryView":981 * __PYX_XDEC_MEMVIEW(&self.from_slice, 1) * * cdef convert_item_to_object(self, char *itemp): # <<<<<<<<<<<<<< @@ -17901,7 +17964,7 @@ static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __pyx_memor return __pyx_r; } -/* "View.MemoryView":985 +/* "View.MemoryView":987 * return memoryview.convert_item_to_object(self, itemp) * * cdef assign_item_from_object(self, char *itemp, object value): # <<<<<<<<<<<<<< @@ -17920,7 +17983,7 @@ static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memo int __pyx_clineno = 0; __Pyx_RefNannySetupContext("assign_item_from_object", 0); - /* "View.MemoryView":986 + /* "View.MemoryView":988 * * cdef assign_item_from_object(self, char *itemp, object value): * if self.to_dtype_func != NULL: # <<<<<<<<<<<<<< @@ -17930,16 +17993,16 @@ static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memo __pyx_t_1 = ((__pyx_v_self->to_dtype_func != NULL) != 0); if (__pyx_t_1) { - /* "View.MemoryView":987 + /* "View.MemoryView":989 * cdef assign_item_from_object(self, char *itemp, object value): * if self.to_dtype_func != NULL: * self.to_dtype_func(itemp, value) # <<<<<<<<<<<<<< * else: * memoryview.assign_item_from_object(self, itemp, value) */ - __pyx_t_2 = __pyx_v_self->to_dtype_func(__pyx_v_itemp, __pyx_v_value); if (unlikely(__pyx_t_2 == ((int)0))) __PYX_ERR(1, 987, __pyx_L1_error) + __pyx_t_2 = __pyx_v_self->to_dtype_func(__pyx_v_itemp, __pyx_v_value); if (unlikely(__pyx_t_2 == ((int)0))) __PYX_ERR(1, 989, __pyx_L1_error) - /* "View.MemoryView":986 + /* "View.MemoryView":988 * * cdef assign_item_from_object(self, char *itemp, object value): * if self.to_dtype_func != NULL: # <<<<<<<<<<<<<< @@ -17949,7 +18012,7 @@ static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memo goto __pyx_L3; } - /* "View.MemoryView":989 + /* "View.MemoryView":991 * self.to_dtype_func(itemp, value) * else: * memoryview.assign_item_from_object(self, itemp, value) # <<<<<<<<<<<<<< @@ -17957,13 +18020,13 @@ static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memo * @property */ /*else*/ { - __pyx_t_3 = __pyx_memoryview_assign_item_from_object(((struct __pyx_memoryview_obj *)__pyx_v_self), __pyx_v_itemp, __pyx_v_value); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 989, __pyx_L1_error) + __pyx_t_3 = __pyx_memoryview_assign_item_from_object(((struct __pyx_memoryview_obj *)__pyx_v_self), __pyx_v_itemp, __pyx_v_value); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 991, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } __pyx_L3:; - /* "View.MemoryView":985 + /* "View.MemoryView":987 * return memoryview.convert_item_to_object(self, itemp) * * cdef assign_item_from_object(self, char *itemp, object value): # <<<<<<<<<<<<<< @@ -17984,7 +18047,7 @@ static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memo return __pyx_r; } -/* "View.MemoryView":992 +/* "View.MemoryView":994 * * @property * def base(self): # <<<<<<<<<<<<<< @@ -18010,7 +18073,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_16_memoryviewslice_4base___get__ __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__get__", 0); - /* "View.MemoryView":993 + /* "View.MemoryView":995 * @property * def base(self): * return self.from_object # <<<<<<<<<<<<<< @@ -18022,7 +18085,7 @@ static PyObject *__pyx_pf_15View_dot_MemoryView_16_memoryviewslice_4base___get__ __pyx_r = __pyx_v_self->from_object; goto __pyx_L0; - /* "View.MemoryView":992 + /* "View.MemoryView":994 * * @property * def base(self): # <<<<<<<<<<<<<< @@ -18150,7 +18213,7 @@ static PyObject *__pyx_pf___pyx_memoryviewslice_2__setstate_cython__(CYTHON_UNUS return __pyx_r; } -/* "View.MemoryView":999 +/* "View.MemoryView":1001 * * @cname('__pyx_memoryview_fromslice') * cdef memoryview_fromslice(__Pyx_memviewslice memviewslice, # <<<<<<<<<<<<<< @@ -18178,7 +18241,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl int __pyx_clineno = 0; __Pyx_RefNannySetupContext("memoryview_fromslice", 0); - /* "View.MemoryView":1007 + /* "View.MemoryView":1009 * cdef _memoryviewslice result * * if memviewslice.memview == Py_None: # <<<<<<<<<<<<<< @@ -18188,7 +18251,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __pyx_t_1 = ((((PyObject *)__pyx_v_memviewslice.memview) == Py_None) != 0); if (__pyx_t_1) { - /* "View.MemoryView":1008 + /* "View.MemoryView":1010 * * if memviewslice.memview == Py_None: * return None # <<<<<<<<<<<<<< @@ -18199,7 +18262,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; - /* "View.MemoryView":1007 + /* "View.MemoryView":1009 * cdef _memoryviewslice result * * if memviewslice.memview == Py_None: # <<<<<<<<<<<<<< @@ -18208,16 +18271,16 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ } - /* "View.MemoryView":1013 + /* "View.MemoryView":1015 * * * result = _memoryviewslice(None, 0, dtype_is_object) # <<<<<<<<<<<<<< * * result.from_slice = memviewslice */ - __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1013, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_dtype_is_object); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1015, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1013, __pyx_L1_error) + __pyx_t_3 = PyTuple_New(3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1015, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(Py_None); __Pyx_GIVEREF(Py_None); @@ -18228,13 +18291,13 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_3, 2, __pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryviewslice_type), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1013, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_Call(((PyObject *)__pyx_memoryviewslice_type), __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1015, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_result = ((struct __pyx_memoryviewslice_obj *)__pyx_t_2); __pyx_t_2 = 0; - /* "View.MemoryView":1015 + /* "View.MemoryView":1017 * result = _memoryviewslice(None, 0, dtype_is_object) * * result.from_slice = memviewslice # <<<<<<<<<<<<<< @@ -18243,7 +18306,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ __pyx_v_result->from_slice = __pyx_v_memviewslice; - /* "View.MemoryView":1016 + /* "View.MemoryView":1018 * * result.from_slice = memviewslice * __PYX_INC_MEMVIEW(&memviewslice, 1) # <<<<<<<<<<<<<< @@ -18252,14 +18315,14 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ __PYX_INC_MEMVIEW((&__pyx_v_memviewslice), 1); - /* "View.MemoryView":1018 + /* "View.MemoryView":1020 * __PYX_INC_MEMVIEW(&memviewslice, 1) * * result.from_object = ( memviewslice.memview).base # <<<<<<<<<<<<<< * result.typeinfo = memviewslice.memview.typeinfo * */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_memviewslice.memview), __pyx_n_s_base); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1018, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_memviewslice.memview), __pyx_n_s_base); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1020, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); __Pyx_GOTREF(__pyx_v_result->from_object); @@ -18267,7 +18330,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __pyx_v_result->from_object = __pyx_t_2; __pyx_t_2 = 0; - /* "View.MemoryView":1019 + /* "View.MemoryView":1021 * * result.from_object = ( memviewslice.memview).base * result.typeinfo = memviewslice.memview.typeinfo # <<<<<<<<<<<<<< @@ -18277,7 +18340,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __pyx_t_4 = __pyx_v_memviewslice.memview->typeinfo; __pyx_v_result->__pyx_base.typeinfo = __pyx_t_4; - /* "View.MemoryView":1021 + /* "View.MemoryView":1023 * result.typeinfo = memviewslice.memview.typeinfo * * result.view = memviewslice.memview.view # <<<<<<<<<<<<<< @@ -18287,7 +18350,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __pyx_t_5 = __pyx_v_memviewslice.memview->view; __pyx_v_result->__pyx_base.view = __pyx_t_5; - /* "View.MemoryView":1022 + /* "View.MemoryView":1024 * * result.view = memviewslice.memview.view * result.view.buf = memviewslice.data # <<<<<<<<<<<<<< @@ -18296,7 +18359,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ __pyx_v_result->__pyx_base.view.buf = ((void *)__pyx_v_memviewslice.data); - /* "View.MemoryView":1023 + /* "View.MemoryView":1025 * result.view = memviewslice.memview.view * result.view.buf = memviewslice.data * result.view.ndim = ndim # <<<<<<<<<<<<<< @@ -18305,7 +18368,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ __pyx_v_result->__pyx_base.view.ndim = __pyx_v_ndim; - /* "View.MemoryView":1024 + /* "View.MemoryView":1026 * result.view.buf = memviewslice.data * result.view.ndim = ndim * (<__pyx_buffer *> &result.view).obj = Py_None # <<<<<<<<<<<<<< @@ -18314,7 +18377,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ ((Py_buffer *)(&__pyx_v_result->__pyx_base.view))->obj = Py_None; - /* "View.MemoryView":1025 + /* "View.MemoryView":1027 * result.view.ndim = ndim * (<__pyx_buffer *> &result.view).obj = Py_None * Py_INCREF(Py_None) # <<<<<<<<<<<<<< @@ -18323,7 +18386,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ Py_INCREF(Py_None); - /* "View.MemoryView":1027 + /* "View.MemoryView":1029 * Py_INCREF(Py_None) * * if (memviewslice.memview).flags & PyBUF_WRITABLE: # <<<<<<<<<<<<<< @@ -18333,7 +18396,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __pyx_t_1 = ((((struct __pyx_memoryview_obj *)__pyx_v_memviewslice.memview)->flags & PyBUF_WRITABLE) != 0); if (__pyx_t_1) { - /* "View.MemoryView":1028 + /* "View.MemoryView":1030 * * if (memviewslice.memview).flags & PyBUF_WRITABLE: * result.flags = PyBUF_RECORDS # <<<<<<<<<<<<<< @@ -18342,7 +18405,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ __pyx_v_result->__pyx_base.flags = PyBUF_RECORDS; - /* "View.MemoryView":1027 + /* "View.MemoryView":1029 * Py_INCREF(Py_None) * * if (memviewslice.memview).flags & PyBUF_WRITABLE: # <<<<<<<<<<<<<< @@ -18352,7 +18415,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl goto __pyx_L4; } - /* "View.MemoryView":1030 + /* "View.MemoryView":1032 * result.flags = PyBUF_RECORDS * else: * result.flags = PyBUF_RECORDS_RO # <<<<<<<<<<<<<< @@ -18364,7 +18427,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl } __pyx_L4:; - /* "View.MemoryView":1032 + /* "View.MemoryView":1034 * result.flags = PyBUF_RECORDS_RO * * result.view.shape = result.from_slice.shape # <<<<<<<<<<<<<< @@ -18373,7 +18436,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ __pyx_v_result->__pyx_base.view.shape = ((Py_ssize_t *)__pyx_v_result->from_slice.shape); - /* "View.MemoryView":1033 + /* "View.MemoryView":1035 * * result.view.shape = result.from_slice.shape * result.view.strides = result.from_slice.strides # <<<<<<<<<<<<<< @@ -18382,7 +18445,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ __pyx_v_result->__pyx_base.view.strides = ((Py_ssize_t *)__pyx_v_result->from_slice.strides); - /* "View.MemoryView":1036 + /* "View.MemoryView":1038 * * * result.view.suboffsets = NULL # <<<<<<<<<<<<<< @@ -18391,7 +18454,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ __pyx_v_result->__pyx_base.view.suboffsets = NULL; - /* "View.MemoryView":1037 + /* "View.MemoryView":1039 * * result.view.suboffsets = NULL * for suboffset in result.from_slice.suboffsets[:ndim]: # <<<<<<<<<<<<<< @@ -18403,7 +18466,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __pyx_t_6 = __pyx_t_8; __pyx_v_suboffset = (__pyx_t_6[0]); - /* "View.MemoryView":1038 + /* "View.MemoryView":1040 * result.view.suboffsets = NULL * for suboffset in result.from_slice.suboffsets[:ndim]: * if suboffset >= 0: # <<<<<<<<<<<<<< @@ -18413,7 +18476,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __pyx_t_1 = ((__pyx_v_suboffset >= 0) != 0); if (__pyx_t_1) { - /* "View.MemoryView":1039 + /* "View.MemoryView":1041 * for suboffset in result.from_slice.suboffsets[:ndim]: * if suboffset >= 0: * result.view.suboffsets = result.from_slice.suboffsets # <<<<<<<<<<<<<< @@ -18422,7 +18485,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ __pyx_v_result->__pyx_base.view.suboffsets = ((Py_ssize_t *)__pyx_v_result->from_slice.suboffsets); - /* "View.MemoryView":1040 + /* "View.MemoryView":1042 * if suboffset >= 0: * result.view.suboffsets = result.from_slice.suboffsets * break # <<<<<<<<<<<<<< @@ -18431,7 +18494,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ goto __pyx_L6_break; - /* "View.MemoryView":1038 + /* "View.MemoryView":1040 * result.view.suboffsets = NULL * for suboffset in result.from_slice.suboffsets[:ndim]: * if suboffset >= 0: # <<<<<<<<<<<<<< @@ -18442,7 +18505,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl } __pyx_L6_break:; - /* "View.MemoryView":1042 + /* "View.MemoryView":1044 * break * * result.view.len = result.view.itemsize # <<<<<<<<<<<<<< @@ -18452,7 +18515,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __pyx_t_9 = __pyx_v_result->__pyx_base.view.itemsize; __pyx_v_result->__pyx_base.view.len = __pyx_t_9; - /* "View.MemoryView":1043 + /* "View.MemoryView":1045 * * result.view.len = result.view.itemsize * for length in result.view.shape[:ndim]: # <<<<<<<<<<<<<< @@ -18462,29 +18525,29 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __pyx_t_7 = (__pyx_v_result->__pyx_base.view.shape + __pyx_v_ndim); for (__pyx_t_8 = __pyx_v_result->__pyx_base.view.shape; __pyx_t_8 < __pyx_t_7; __pyx_t_8++) { __pyx_t_6 = __pyx_t_8; - __pyx_t_2 = PyInt_FromSsize_t((__pyx_t_6[0])); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1043, __pyx_L1_error) + __pyx_t_2 = PyInt_FromSsize_t((__pyx_t_6[0])); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1045, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_XDECREF_SET(__pyx_v_length, __pyx_t_2); __pyx_t_2 = 0; - /* "View.MemoryView":1044 + /* "View.MemoryView":1046 * result.view.len = result.view.itemsize * for length in result.view.shape[:ndim]: * result.view.len *= length # <<<<<<<<<<<<<< * * result.to_object_func = to_object_func */ - __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_result->__pyx_base.view.len); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1044, __pyx_L1_error) + __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_result->__pyx_base.view.len); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1046, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyNumber_InPlaceMultiply(__pyx_t_2, __pyx_v_length); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1044, __pyx_L1_error) + __pyx_t_3 = PyNumber_InPlaceMultiply(__pyx_t_2, __pyx_v_length); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1046, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_t_3); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 1044, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_t_3); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(1, 1046, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_result->__pyx_base.view.len = __pyx_t_9; } - /* "View.MemoryView":1046 + /* "View.MemoryView":1048 * result.view.len *= length * * result.to_object_func = to_object_func # <<<<<<<<<<<<<< @@ -18493,7 +18556,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ __pyx_v_result->to_object_func = __pyx_v_to_object_func; - /* "View.MemoryView":1047 + /* "View.MemoryView":1049 * * result.to_object_func = to_object_func * result.to_dtype_func = to_dtype_func # <<<<<<<<<<<<<< @@ -18502,7 +18565,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl */ __pyx_v_result->to_dtype_func = __pyx_v_to_dtype_func; - /* "View.MemoryView":1049 + /* "View.MemoryView":1051 * result.to_dtype_func = to_dtype_func * * return result # <<<<<<<<<<<<<< @@ -18514,7 +18577,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl __pyx_r = ((PyObject *)__pyx_v_result); goto __pyx_L0; - /* "View.MemoryView":999 + /* "View.MemoryView":1001 * * @cname('__pyx_memoryview_fromslice') * cdef memoryview_fromslice(__Pyx_memviewslice memviewslice, # <<<<<<<<<<<<<< @@ -18536,7 +18599,7 @@ static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice __pyx_v_memviewsl return __pyx_r; } -/* "View.MemoryView":1052 +/* "View.MemoryView":1054 * * @cname('__pyx_memoryview_get_slice_from_memoryview') * cdef __Pyx_memviewslice *get_slice_from_memview(memoryview memview, # <<<<<<<<<<<<<< @@ -18556,7 +18619,7 @@ static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __p int __pyx_clineno = 0; __Pyx_RefNannySetupContext("get_slice_from_memview", 0); - /* "View.MemoryView":1055 + /* "View.MemoryView":1057 * __Pyx_memviewslice *mslice) except NULL: * cdef _memoryviewslice obj * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< @@ -18567,20 +18630,20 @@ static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __p __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "View.MemoryView":1056 + /* "View.MemoryView":1058 * cdef _memoryviewslice obj * if isinstance(memview, _memoryviewslice): * obj = memview # <<<<<<<<<<<<<< * return &obj.from_slice * else: */ - if (!(likely(((((PyObject *)__pyx_v_memview)) == Py_None) || likely(__Pyx_TypeTest(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type))))) __PYX_ERR(1, 1056, __pyx_L1_error) + if (!(likely(((((PyObject *)__pyx_v_memview)) == Py_None) || likely(__Pyx_TypeTest(((PyObject *)__pyx_v_memview), __pyx_memoryviewslice_type))))) __PYX_ERR(1, 1058, __pyx_L1_error) __pyx_t_3 = ((PyObject *)__pyx_v_memview); __Pyx_INCREF(__pyx_t_3); __pyx_v_obj = ((struct __pyx_memoryviewslice_obj *)__pyx_t_3); __pyx_t_3 = 0; - /* "View.MemoryView":1057 + /* "View.MemoryView":1059 * if isinstance(memview, _memoryviewslice): * obj = memview * return &obj.from_slice # <<<<<<<<<<<<<< @@ -18590,7 +18653,7 @@ static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __p __pyx_r = (&__pyx_v_obj->from_slice); goto __pyx_L0; - /* "View.MemoryView":1055 + /* "View.MemoryView":1057 * __Pyx_memviewslice *mslice) except NULL: * cdef _memoryviewslice obj * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< @@ -18599,7 +18662,7 @@ static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __p */ } - /* "View.MemoryView":1059 + /* "View.MemoryView":1061 * return &obj.from_slice * else: * slice_copy(memview, mslice) # <<<<<<<<<<<<<< @@ -18609,7 +18672,7 @@ static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __p /*else*/ { __pyx_memoryview_slice_copy(__pyx_v_memview, __pyx_v_mslice); - /* "View.MemoryView":1060 + /* "View.MemoryView":1062 * else: * slice_copy(memview, mslice) * return mslice # <<<<<<<<<<<<<< @@ -18620,7 +18683,7 @@ static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __p goto __pyx_L0; } - /* "View.MemoryView":1052 + /* "View.MemoryView":1054 * * @cname('__pyx_memoryview_get_slice_from_memoryview') * cdef __Pyx_memviewslice *get_slice_from_memview(memoryview memview, # <<<<<<<<<<<<<< @@ -18639,7 +18702,7 @@ static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __p return __pyx_r; } -/* "View.MemoryView":1063 +/* "View.MemoryView":1065 * * @cname('__pyx_memoryview_slice_copy') * cdef void slice_copy(memoryview memview, __Pyx_memviewslice *dst): # <<<<<<<<<<<<<< @@ -18660,7 +18723,7 @@ static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_mem Py_ssize_t __pyx_t_5; __Pyx_RefNannySetupContext("slice_copy", 0); - /* "View.MemoryView":1067 + /* "View.MemoryView":1069 * cdef (Py_ssize_t*) shape, strides, suboffsets * * shape = memview.view.shape # <<<<<<<<<<<<<< @@ -18670,7 +18733,7 @@ static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_mem __pyx_t_1 = __pyx_v_memview->view.shape; __pyx_v_shape = __pyx_t_1; - /* "View.MemoryView":1068 + /* "View.MemoryView":1070 * * shape = memview.view.shape * strides = memview.view.strides # <<<<<<<<<<<<<< @@ -18680,7 +18743,7 @@ static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_mem __pyx_t_1 = __pyx_v_memview->view.strides; __pyx_v_strides = __pyx_t_1; - /* "View.MemoryView":1069 + /* "View.MemoryView":1071 * shape = memview.view.shape * strides = memview.view.strides * suboffsets = memview.view.suboffsets # <<<<<<<<<<<<<< @@ -18690,7 +18753,7 @@ static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_mem __pyx_t_1 = __pyx_v_memview->view.suboffsets; __pyx_v_suboffsets = __pyx_t_1; - /* "View.MemoryView":1071 + /* "View.MemoryView":1073 * suboffsets = memview.view.suboffsets * * dst.memview = <__pyx_memoryview *> memview # <<<<<<<<<<<<<< @@ -18699,7 +18762,7 @@ static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_mem */ __pyx_v_dst->memview = ((struct __pyx_memoryview_obj *)__pyx_v_memview); - /* "View.MemoryView":1072 + /* "View.MemoryView":1074 * * dst.memview = <__pyx_memoryview *> memview * dst.data = memview.view.buf # <<<<<<<<<<<<<< @@ -18708,7 +18771,7 @@ static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_mem */ __pyx_v_dst->data = ((char *)__pyx_v_memview->view.buf); - /* "View.MemoryView":1074 + /* "View.MemoryView":1076 * dst.data = memview.view.buf * * for dim in range(memview.view.ndim): # <<<<<<<<<<<<<< @@ -18720,7 +18783,7 @@ static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_mem for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_dim = __pyx_t_4; - /* "View.MemoryView":1075 + /* "View.MemoryView":1077 * * for dim in range(memview.view.ndim): * dst.shape[dim] = shape[dim] # <<<<<<<<<<<<<< @@ -18729,7 +18792,7 @@ static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_mem */ (__pyx_v_dst->shape[__pyx_v_dim]) = (__pyx_v_shape[__pyx_v_dim]); - /* "View.MemoryView":1076 + /* "View.MemoryView":1078 * for dim in range(memview.view.ndim): * dst.shape[dim] = shape[dim] * dst.strides[dim] = strides[dim] # <<<<<<<<<<<<<< @@ -18738,7 +18801,7 @@ static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_mem */ (__pyx_v_dst->strides[__pyx_v_dim]) = (__pyx_v_strides[__pyx_v_dim]); - /* "View.MemoryView":1077 + /* "View.MemoryView":1079 * dst.shape[dim] = shape[dim] * dst.strides[dim] = strides[dim] * dst.suboffsets[dim] = suboffsets[dim] if suboffsets else -1 # <<<<<<<<<<<<<< @@ -18753,7 +18816,7 @@ static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_mem (__pyx_v_dst->suboffsets[__pyx_v_dim]) = __pyx_t_5; } - /* "View.MemoryView":1063 + /* "View.MemoryView":1065 * * @cname('__pyx_memoryview_slice_copy') * cdef void slice_copy(memoryview memview, __Pyx_memviewslice *dst): # <<<<<<<<<<<<<< @@ -18765,7 +18828,7 @@ static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *__pyx_v_mem __Pyx_RefNannyFinishContext(); } -/* "View.MemoryView":1080 +/* "View.MemoryView":1082 * * @cname('__pyx_memoryview_copy_object') * cdef memoryview_copy(memoryview memview): # <<<<<<<<<<<<<< @@ -18783,7 +18846,7 @@ static PyObject *__pyx_memoryview_copy_object(struct __pyx_memoryview_obj *__pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("memoryview_copy", 0); - /* "View.MemoryView":1083 + /* "View.MemoryView":1085 * "Create a new memoryview object" * cdef __Pyx_memviewslice memviewslice * slice_copy(memview, &memviewslice) # <<<<<<<<<<<<<< @@ -18792,7 +18855,7 @@ static PyObject *__pyx_memoryview_copy_object(struct __pyx_memoryview_obj *__pyx */ __pyx_memoryview_slice_copy(__pyx_v_memview, (&__pyx_v_memviewslice)); - /* "View.MemoryView":1084 + /* "View.MemoryView":1086 * cdef __Pyx_memviewslice memviewslice * slice_copy(memview, &memviewslice) * return memoryview_copy_from_slice(memview, &memviewslice) # <<<<<<<<<<<<<< @@ -18800,13 +18863,13 @@ static PyObject *__pyx_memoryview_copy_object(struct __pyx_memoryview_obj *__pyx * @cname('__pyx_memoryview_copy_object_from_slice') */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __pyx_memoryview_copy_object_from_slice(__pyx_v_memview, (&__pyx_v_memviewslice)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 1084, __pyx_L1_error) + __pyx_t_1 = __pyx_memoryview_copy_object_from_slice(__pyx_v_memview, (&__pyx_v_memviewslice)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 1086, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "View.MemoryView":1080 + /* "View.MemoryView":1082 * * @cname('__pyx_memoryview_copy_object') * cdef memoryview_copy(memoryview memview): # <<<<<<<<<<<<<< @@ -18825,7 +18888,7 @@ static PyObject *__pyx_memoryview_copy_object(struct __pyx_memoryview_obj *__pyx return __pyx_r; } -/* "View.MemoryView":1087 +/* "View.MemoryView":1089 * * @cname('__pyx_memoryview_copy_object_from_slice') * cdef memoryview_copy_from_slice(memoryview memview, __Pyx_memviewslice *memviewslice): # <<<<<<<<<<<<<< @@ -18848,7 +18911,7 @@ static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview int __pyx_clineno = 0; __Pyx_RefNannySetupContext("memoryview_copy_from_slice", 0); - /* "View.MemoryView":1094 + /* "View.MemoryView":1096 * cdef int (*to_dtype_func)(char *, object) except 0 * * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< @@ -18859,7 +18922,7 @@ static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "View.MemoryView":1095 + /* "View.MemoryView":1097 * * if isinstance(memview, _memoryviewslice): * to_object_func = (<_memoryviewslice> memview).to_object_func # <<<<<<<<<<<<<< @@ -18869,7 +18932,7 @@ static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview __pyx_t_3 = ((struct __pyx_memoryviewslice_obj *)__pyx_v_memview)->to_object_func; __pyx_v_to_object_func = __pyx_t_3; - /* "View.MemoryView":1096 + /* "View.MemoryView":1098 * if isinstance(memview, _memoryviewslice): * to_object_func = (<_memoryviewslice> memview).to_object_func * to_dtype_func = (<_memoryviewslice> memview).to_dtype_func # <<<<<<<<<<<<<< @@ -18879,7 +18942,7 @@ static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview __pyx_t_4 = ((struct __pyx_memoryviewslice_obj *)__pyx_v_memview)->to_dtype_func; __pyx_v_to_dtype_func = __pyx_t_4; - /* "View.MemoryView":1094 + /* "View.MemoryView":1096 * cdef int (*to_dtype_func)(char *, object) except 0 * * if isinstance(memview, _memoryviewslice): # <<<<<<<<<<<<<< @@ -18889,7 +18952,7 @@ static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview goto __pyx_L3; } - /* "View.MemoryView":1098 + /* "View.MemoryView":1100 * to_dtype_func = (<_memoryviewslice> memview).to_dtype_func * else: * to_object_func = NULL # <<<<<<<<<<<<<< @@ -18899,7 +18962,7 @@ static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview /*else*/ { __pyx_v_to_object_func = NULL; - /* "View.MemoryView":1099 + /* "View.MemoryView":1101 * else: * to_object_func = NULL * to_dtype_func = NULL # <<<<<<<<<<<<<< @@ -18910,7 +18973,7 @@ static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview } __pyx_L3:; - /* "View.MemoryView":1101 + /* "View.MemoryView":1103 * to_dtype_func = NULL * * return memoryview_fromslice(memviewslice[0], memview.view.ndim, # <<<<<<<<<<<<<< @@ -18919,20 +18982,20 @@ static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview */ __Pyx_XDECREF(__pyx_r); - /* "View.MemoryView":1103 + /* "View.MemoryView":1105 * return memoryview_fromslice(memviewslice[0], memview.view.ndim, * to_object_func, to_dtype_func, * memview.dtype_is_object) # <<<<<<<<<<<<<< * * */ - __pyx_t_5 = __pyx_memoryview_fromslice((__pyx_v_memviewslice[0]), __pyx_v_memview->view.ndim, __pyx_v_to_object_func, __pyx_v_to_dtype_func, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 1101, __pyx_L1_error) + __pyx_t_5 = __pyx_memoryview_fromslice((__pyx_v_memviewslice[0]), __pyx_v_memview->view.ndim, __pyx_v_to_object_func, __pyx_v_to_dtype_func, __pyx_v_memview->dtype_is_object); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 1103, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_r = __pyx_t_5; __pyx_t_5 = 0; goto __pyx_L0; - /* "View.MemoryView":1087 + /* "View.MemoryView":1089 * * @cname('__pyx_memoryview_copy_object_from_slice') * cdef memoryview_copy_from_slice(memoryview memview, __Pyx_memviewslice *memviewslice): # <<<<<<<<<<<<<< @@ -18951,7 +19014,7 @@ static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview return __pyx_r; } -/* "View.MemoryView":1109 +/* "View.MemoryView":1111 * * * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil: # <<<<<<<<<<<<<< @@ -18963,7 +19026,7 @@ static Py_ssize_t abs_py_ssize_t(Py_ssize_t __pyx_v_arg) { Py_ssize_t __pyx_r; int __pyx_t_1; - /* "View.MemoryView":1110 + /* "View.MemoryView":1112 * * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil: * if arg < 0: # <<<<<<<<<<<<<< @@ -18973,7 +19036,7 @@ static Py_ssize_t abs_py_ssize_t(Py_ssize_t __pyx_v_arg) { __pyx_t_1 = ((__pyx_v_arg < 0) != 0); if (__pyx_t_1) { - /* "View.MemoryView":1111 + /* "View.MemoryView":1113 * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil: * if arg < 0: * return -arg # <<<<<<<<<<<<<< @@ -18983,7 +19046,7 @@ static Py_ssize_t abs_py_ssize_t(Py_ssize_t __pyx_v_arg) { __pyx_r = (-__pyx_v_arg); goto __pyx_L0; - /* "View.MemoryView":1110 + /* "View.MemoryView":1112 * * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil: * if arg < 0: # <<<<<<<<<<<<<< @@ -18992,7 +19055,7 @@ static Py_ssize_t abs_py_ssize_t(Py_ssize_t __pyx_v_arg) { */ } - /* "View.MemoryView":1113 + /* "View.MemoryView":1115 * return -arg * else: * return arg # <<<<<<<<<<<<<< @@ -19004,7 +19067,7 @@ static Py_ssize_t abs_py_ssize_t(Py_ssize_t __pyx_v_arg) { goto __pyx_L0; } - /* "View.MemoryView":1109 + /* "View.MemoryView":1111 * * * cdef Py_ssize_t abs_py_ssize_t(Py_ssize_t arg) nogil: # <<<<<<<<<<<<<< @@ -19017,7 +19080,7 @@ static Py_ssize_t abs_py_ssize_t(Py_ssize_t __pyx_v_arg) { return __pyx_r; } -/* "View.MemoryView":1116 +/* "View.MemoryView":1118 * * @cname('__pyx_get_best_slice_order') * cdef char get_best_order(__Pyx_memviewslice *mslice, int ndim) nogil: # <<<<<<<<<<<<<< @@ -19035,7 +19098,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ int __pyx_t_3; int __pyx_t_4; - /* "View.MemoryView":1121 + /* "View.MemoryView":1123 * """ * cdef int i * cdef Py_ssize_t c_stride = 0 # <<<<<<<<<<<<<< @@ -19044,7 +19107,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ */ __pyx_v_c_stride = 0; - /* "View.MemoryView":1122 + /* "View.MemoryView":1124 * cdef int i * cdef Py_ssize_t c_stride = 0 * cdef Py_ssize_t f_stride = 0 # <<<<<<<<<<<<<< @@ -19053,7 +19116,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ */ __pyx_v_f_stride = 0; - /* "View.MemoryView":1124 + /* "View.MemoryView":1126 * cdef Py_ssize_t f_stride = 0 * * for i in range(ndim - 1, -1, -1): # <<<<<<<<<<<<<< @@ -19063,7 +19126,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ for (__pyx_t_1 = (__pyx_v_ndim - 1); __pyx_t_1 > -1; __pyx_t_1-=1) { __pyx_v_i = __pyx_t_1; - /* "View.MemoryView":1125 + /* "View.MemoryView":1127 * * for i in range(ndim - 1, -1, -1): * if mslice.shape[i] > 1: # <<<<<<<<<<<<<< @@ -19073,7 +19136,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ __pyx_t_2 = (((__pyx_v_mslice->shape[__pyx_v_i]) > 1) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1126 + /* "View.MemoryView":1128 * for i in range(ndim - 1, -1, -1): * if mslice.shape[i] > 1: * c_stride = mslice.strides[i] # <<<<<<<<<<<<<< @@ -19082,7 +19145,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ */ __pyx_v_c_stride = (__pyx_v_mslice->strides[__pyx_v_i]); - /* "View.MemoryView":1127 + /* "View.MemoryView":1129 * if mslice.shape[i] > 1: * c_stride = mslice.strides[i] * break # <<<<<<<<<<<<<< @@ -19091,7 +19154,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ */ goto __pyx_L4_break; - /* "View.MemoryView":1125 + /* "View.MemoryView":1127 * * for i in range(ndim - 1, -1, -1): * if mslice.shape[i] > 1: # <<<<<<<<<<<<<< @@ -19102,7 +19165,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ } __pyx_L4_break:; - /* "View.MemoryView":1129 + /* "View.MemoryView":1131 * break * * for i in range(ndim): # <<<<<<<<<<<<<< @@ -19114,7 +19177,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_i = __pyx_t_4; - /* "View.MemoryView":1130 + /* "View.MemoryView":1132 * * for i in range(ndim): * if mslice.shape[i] > 1: # <<<<<<<<<<<<<< @@ -19124,7 +19187,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ __pyx_t_2 = (((__pyx_v_mslice->shape[__pyx_v_i]) > 1) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1131 + /* "View.MemoryView":1133 * for i in range(ndim): * if mslice.shape[i] > 1: * f_stride = mslice.strides[i] # <<<<<<<<<<<<<< @@ -19133,7 +19196,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ */ __pyx_v_f_stride = (__pyx_v_mslice->strides[__pyx_v_i]); - /* "View.MemoryView":1132 + /* "View.MemoryView":1134 * if mslice.shape[i] > 1: * f_stride = mslice.strides[i] * break # <<<<<<<<<<<<<< @@ -19142,7 +19205,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ */ goto __pyx_L7_break; - /* "View.MemoryView":1130 + /* "View.MemoryView":1132 * * for i in range(ndim): * if mslice.shape[i] > 1: # <<<<<<<<<<<<<< @@ -19153,7 +19216,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ } __pyx_L7_break:; - /* "View.MemoryView":1134 + /* "View.MemoryView":1136 * break * * if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride): # <<<<<<<<<<<<<< @@ -19163,7 +19226,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ __pyx_t_2 = ((abs_py_ssize_t(__pyx_v_c_stride) <= abs_py_ssize_t(__pyx_v_f_stride)) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1135 + /* "View.MemoryView":1137 * * if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride): * return 'C' # <<<<<<<<<<<<<< @@ -19173,7 +19236,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ __pyx_r = 'C'; goto __pyx_L0; - /* "View.MemoryView":1134 + /* "View.MemoryView":1136 * break * * if abs_py_ssize_t(c_stride) <= abs_py_ssize_t(f_stride): # <<<<<<<<<<<<<< @@ -19182,7 +19245,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ */ } - /* "View.MemoryView":1137 + /* "View.MemoryView":1139 * return 'C' * else: * return 'F' # <<<<<<<<<<<<<< @@ -19194,7 +19257,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ goto __pyx_L0; } - /* "View.MemoryView":1116 + /* "View.MemoryView":1118 * * @cname('__pyx_get_best_slice_order') * cdef char get_best_order(__Pyx_memviewslice *mslice, int ndim) nogil: # <<<<<<<<<<<<<< @@ -19207,7 +19270,7 @@ static char __pyx_get_best_slice_order(__Pyx_memviewslice *__pyx_v_mslice, int _ return __pyx_r; } -/* "View.MemoryView":1140 +/* "View.MemoryView":1142 * * @cython.cdivision(True) * cdef void _copy_strided_to_strided(char *src_data, Py_ssize_t *src_strides, # <<<<<<<<<<<<<< @@ -19228,7 +19291,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v Py_ssize_t __pyx_t_5; Py_ssize_t __pyx_t_6; - /* "View.MemoryView":1147 + /* "View.MemoryView":1149 * * cdef Py_ssize_t i * cdef Py_ssize_t src_extent = src_shape[0] # <<<<<<<<<<<<<< @@ -19237,7 +19300,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v */ __pyx_v_src_extent = (__pyx_v_src_shape[0]); - /* "View.MemoryView":1148 + /* "View.MemoryView":1150 * cdef Py_ssize_t i * cdef Py_ssize_t src_extent = src_shape[0] * cdef Py_ssize_t dst_extent = dst_shape[0] # <<<<<<<<<<<<<< @@ -19246,7 +19309,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v */ __pyx_v_dst_extent = (__pyx_v_dst_shape[0]); - /* "View.MemoryView":1149 + /* "View.MemoryView":1151 * cdef Py_ssize_t src_extent = src_shape[0] * cdef Py_ssize_t dst_extent = dst_shape[0] * cdef Py_ssize_t src_stride = src_strides[0] # <<<<<<<<<<<<<< @@ -19255,7 +19318,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v */ __pyx_v_src_stride = (__pyx_v_src_strides[0]); - /* "View.MemoryView":1150 + /* "View.MemoryView":1152 * cdef Py_ssize_t dst_extent = dst_shape[0] * cdef Py_ssize_t src_stride = src_strides[0] * cdef Py_ssize_t dst_stride = dst_strides[0] # <<<<<<<<<<<<<< @@ -19264,7 +19327,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v */ __pyx_v_dst_stride = (__pyx_v_dst_strides[0]); - /* "View.MemoryView":1152 + /* "View.MemoryView":1154 * cdef Py_ssize_t dst_stride = dst_strides[0] * * if ndim == 1: # <<<<<<<<<<<<<< @@ -19274,7 +19337,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v __pyx_t_1 = ((__pyx_v_ndim == 1) != 0); if (__pyx_t_1) { - /* "View.MemoryView":1153 + /* "View.MemoryView":1155 * * if ndim == 1: * if (src_stride > 0 and dst_stride > 0 and # <<<<<<<<<<<<<< @@ -19294,7 +19357,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v goto __pyx_L5_bool_binop_done; } - /* "View.MemoryView":1154 + /* "View.MemoryView":1156 * if ndim == 1: * if (src_stride > 0 and dst_stride > 0 and * src_stride == itemsize == dst_stride): # <<<<<<<<<<<<<< @@ -19309,7 +19372,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v __pyx_t_1 = __pyx_t_3; __pyx_L5_bool_binop_done:; - /* "View.MemoryView":1153 + /* "View.MemoryView":1155 * * if ndim == 1: * if (src_stride > 0 and dst_stride > 0 and # <<<<<<<<<<<<<< @@ -19318,7 +19381,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v */ if (__pyx_t_1) { - /* "View.MemoryView":1155 + /* "View.MemoryView":1157 * if (src_stride > 0 and dst_stride > 0 and * src_stride == itemsize == dst_stride): * memcpy(dst_data, src_data, itemsize * dst_extent) # <<<<<<<<<<<<<< @@ -19327,7 +19390,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v */ (void)(memcpy(__pyx_v_dst_data, __pyx_v_src_data, (__pyx_v_itemsize * __pyx_v_dst_extent))); - /* "View.MemoryView":1153 + /* "View.MemoryView":1155 * * if ndim == 1: * if (src_stride > 0 and dst_stride > 0 and # <<<<<<<<<<<<<< @@ -19337,7 +19400,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v goto __pyx_L4; } - /* "View.MemoryView":1157 + /* "View.MemoryView":1159 * memcpy(dst_data, src_data, itemsize * dst_extent) * else: * for i in range(dst_extent): # <<<<<<<<<<<<<< @@ -19350,7 +19413,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { __pyx_v_i = __pyx_t_6; - /* "View.MemoryView":1158 + /* "View.MemoryView":1160 * else: * for i in range(dst_extent): * memcpy(dst_data, src_data, itemsize) # <<<<<<<<<<<<<< @@ -19359,7 +19422,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v */ (void)(memcpy(__pyx_v_dst_data, __pyx_v_src_data, __pyx_v_itemsize)); - /* "View.MemoryView":1159 + /* "View.MemoryView":1161 * for i in range(dst_extent): * memcpy(dst_data, src_data, itemsize) * src_data += src_stride # <<<<<<<<<<<<<< @@ -19368,7 +19431,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v */ __pyx_v_src_data = (__pyx_v_src_data + __pyx_v_src_stride); - /* "View.MemoryView":1160 + /* "View.MemoryView":1162 * memcpy(dst_data, src_data, itemsize) * src_data += src_stride * dst_data += dst_stride # <<<<<<<<<<<<<< @@ -19380,7 +19443,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v } __pyx_L4:; - /* "View.MemoryView":1152 + /* "View.MemoryView":1154 * cdef Py_ssize_t dst_stride = dst_strides[0] * * if ndim == 1: # <<<<<<<<<<<<<< @@ -19390,7 +19453,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v goto __pyx_L3; } - /* "View.MemoryView":1162 + /* "View.MemoryView":1164 * dst_data += dst_stride * else: * for i in range(dst_extent): # <<<<<<<<<<<<<< @@ -19403,7 +19466,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { __pyx_v_i = __pyx_t_6; - /* "View.MemoryView":1163 + /* "View.MemoryView":1165 * else: * for i in range(dst_extent): * _copy_strided_to_strided(src_data, src_strides + 1, # <<<<<<<<<<<<<< @@ -19412,7 +19475,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v */ _copy_strided_to_strided(__pyx_v_src_data, (__pyx_v_src_strides + 1), __pyx_v_dst_data, (__pyx_v_dst_strides + 1), (__pyx_v_src_shape + 1), (__pyx_v_dst_shape + 1), (__pyx_v_ndim - 1), __pyx_v_itemsize); - /* "View.MemoryView":1167 + /* "View.MemoryView":1169 * src_shape + 1, dst_shape + 1, * ndim - 1, itemsize) * src_data += src_stride # <<<<<<<<<<<<<< @@ -19421,7 +19484,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v */ __pyx_v_src_data = (__pyx_v_src_data + __pyx_v_src_stride); - /* "View.MemoryView":1168 + /* "View.MemoryView":1170 * ndim - 1, itemsize) * src_data += src_stride * dst_data += dst_stride # <<<<<<<<<<<<<< @@ -19433,7 +19496,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v } __pyx_L3:; - /* "View.MemoryView":1140 + /* "View.MemoryView":1142 * * @cython.cdivision(True) * cdef void _copy_strided_to_strided(char *src_data, Py_ssize_t *src_strides, # <<<<<<<<<<<<<< @@ -19444,7 +19507,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v /* function exit code */ } -/* "View.MemoryView":1170 +/* "View.MemoryView":1172 * dst_data += dst_stride * * cdef void copy_strided_to_strided(__Pyx_memviewslice *src, # <<<<<<<<<<<<<< @@ -19454,7 +19517,7 @@ static void _copy_strided_to_strided(char *__pyx_v_src_data, Py_ssize_t *__pyx_v static void copy_strided_to_strided(__Pyx_memviewslice *__pyx_v_src, __Pyx_memviewslice *__pyx_v_dst, int __pyx_v_ndim, size_t __pyx_v_itemsize) { - /* "View.MemoryView":1173 + /* "View.MemoryView":1175 * __Pyx_memviewslice *dst, * int ndim, size_t itemsize) nogil: * _copy_strided_to_strided(src.data, src.strides, dst.data, dst.strides, # <<<<<<<<<<<<<< @@ -19463,7 +19526,7 @@ static void copy_strided_to_strided(__Pyx_memviewslice *__pyx_v_src, __Pyx_memvi */ _copy_strided_to_strided(__pyx_v_src->data, __pyx_v_src->strides, __pyx_v_dst->data, __pyx_v_dst->strides, __pyx_v_src->shape, __pyx_v_dst->shape, __pyx_v_ndim, __pyx_v_itemsize); - /* "View.MemoryView":1170 + /* "View.MemoryView":1172 * dst_data += dst_stride * * cdef void copy_strided_to_strided(__Pyx_memviewslice *src, # <<<<<<<<<<<<<< @@ -19474,7 +19537,7 @@ static void copy_strided_to_strided(__Pyx_memviewslice *__pyx_v_src, __Pyx_memvi /* function exit code */ } -/* "View.MemoryView":1177 +/* "View.MemoryView":1179 * * @cname('__pyx_memoryview_slice_get_size') * cdef Py_ssize_t slice_get_size(__Pyx_memviewslice *src, int ndim) nogil: # <<<<<<<<<<<<<< @@ -19491,7 +19554,7 @@ static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *__pyx_v_sr Py_ssize_t *__pyx_t_3; Py_ssize_t *__pyx_t_4; - /* "View.MemoryView":1179 + /* "View.MemoryView":1181 * cdef Py_ssize_t slice_get_size(__Pyx_memviewslice *src, int ndim) nogil: * "Return the size of the memory occupied by the slice in number of bytes" * cdef Py_ssize_t shape, size = src.memview.view.itemsize # <<<<<<<<<<<<<< @@ -19501,7 +19564,7 @@ static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *__pyx_v_sr __pyx_t_1 = __pyx_v_src->memview->view.itemsize; __pyx_v_size = __pyx_t_1; - /* "View.MemoryView":1181 + /* "View.MemoryView":1183 * cdef Py_ssize_t shape, size = src.memview.view.itemsize * * for shape in src.shape[:ndim]: # <<<<<<<<<<<<<< @@ -19513,7 +19576,7 @@ static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *__pyx_v_sr __pyx_t_2 = __pyx_t_4; __pyx_v_shape = (__pyx_t_2[0]); - /* "View.MemoryView":1182 + /* "View.MemoryView":1184 * * for shape in src.shape[:ndim]: * size *= shape # <<<<<<<<<<<<<< @@ -19523,7 +19586,7 @@ static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *__pyx_v_sr __pyx_v_size = (__pyx_v_size * __pyx_v_shape); } - /* "View.MemoryView":1184 + /* "View.MemoryView":1186 * size *= shape * * return size # <<<<<<<<<<<<<< @@ -19533,7 +19596,7 @@ static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *__pyx_v_sr __pyx_r = __pyx_v_size; goto __pyx_L0; - /* "View.MemoryView":1177 + /* "View.MemoryView":1179 * * @cname('__pyx_memoryview_slice_get_size') * cdef Py_ssize_t slice_get_size(__Pyx_memviewslice *src, int ndim) nogil: # <<<<<<<<<<<<<< @@ -19546,7 +19609,7 @@ static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *__pyx_v_sr return __pyx_r; } -/* "View.MemoryView":1187 +/* "View.MemoryView":1189 * * @cname('__pyx_fill_contig_strides_array') * cdef Py_ssize_t fill_contig_strides_array( # <<<<<<<<<<<<<< @@ -19562,7 +19625,7 @@ static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ int __pyx_t_3; int __pyx_t_4; - /* "View.MemoryView":1196 + /* "View.MemoryView":1198 * cdef int idx * * if order == 'F': # <<<<<<<<<<<<<< @@ -19572,7 +19635,7 @@ static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ __pyx_t_1 = ((__pyx_v_order == 'F') != 0); if (__pyx_t_1) { - /* "View.MemoryView":1197 + /* "View.MemoryView":1199 * * if order == 'F': * for idx in range(ndim): # <<<<<<<<<<<<<< @@ -19584,7 +19647,7 @@ static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_idx = __pyx_t_4; - /* "View.MemoryView":1198 + /* "View.MemoryView":1200 * if order == 'F': * for idx in range(ndim): * strides[idx] = stride # <<<<<<<<<<<<<< @@ -19593,7 +19656,7 @@ static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ */ (__pyx_v_strides[__pyx_v_idx]) = __pyx_v_stride; - /* "View.MemoryView":1199 + /* "View.MemoryView":1201 * for idx in range(ndim): * strides[idx] = stride * stride *= shape[idx] # <<<<<<<<<<<<<< @@ -19603,7 +19666,7 @@ static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ __pyx_v_stride = (__pyx_v_stride * (__pyx_v_shape[__pyx_v_idx])); } - /* "View.MemoryView":1196 + /* "View.MemoryView":1198 * cdef int idx * * if order == 'F': # <<<<<<<<<<<<<< @@ -19613,7 +19676,7 @@ static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ goto __pyx_L3; } - /* "View.MemoryView":1201 + /* "View.MemoryView":1203 * stride *= shape[idx] * else: * for idx in range(ndim - 1, -1, -1): # <<<<<<<<<<<<<< @@ -19624,7 +19687,7 @@ static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ for (__pyx_t_2 = (__pyx_v_ndim - 1); __pyx_t_2 > -1; __pyx_t_2-=1) { __pyx_v_idx = __pyx_t_2; - /* "View.MemoryView":1202 + /* "View.MemoryView":1204 * else: * for idx in range(ndim - 1, -1, -1): * strides[idx] = stride # <<<<<<<<<<<<<< @@ -19633,7 +19696,7 @@ static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ */ (__pyx_v_strides[__pyx_v_idx]) = __pyx_v_stride; - /* "View.MemoryView":1203 + /* "View.MemoryView":1205 * for idx in range(ndim - 1, -1, -1): * strides[idx] = stride * stride *= shape[idx] # <<<<<<<<<<<<<< @@ -19645,7 +19708,7 @@ static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ } __pyx_L3:; - /* "View.MemoryView":1205 + /* "View.MemoryView":1207 * stride *= shape[idx] * * return stride # <<<<<<<<<<<<<< @@ -19655,7 +19718,7 @@ static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ __pyx_r = __pyx_v_stride; goto __pyx_L0; - /* "View.MemoryView":1187 + /* "View.MemoryView":1189 * * @cname('__pyx_fill_contig_strides_array') * cdef Py_ssize_t fill_contig_strides_array( # <<<<<<<<<<<<<< @@ -19668,7 +19731,7 @@ static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *__pyx_v_shape, Py_ return __pyx_r; } -/* "View.MemoryView":1208 +/* "View.MemoryView":1210 * * @cname('__pyx_memoryview_copy_data_to_temp') * cdef void *copy_data_to_temp(__Pyx_memviewslice *src, # <<<<<<<<<<<<<< @@ -19692,7 +19755,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, const char *__pyx_filename = NULL; int __pyx_clineno = 0; - /* "View.MemoryView":1219 + /* "View.MemoryView":1221 * cdef void *result * * cdef size_t itemsize = src.memview.view.itemsize # <<<<<<<<<<<<<< @@ -19702,7 +19765,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, __pyx_t_1 = __pyx_v_src->memview->view.itemsize; __pyx_v_itemsize = __pyx_t_1; - /* "View.MemoryView":1220 + /* "View.MemoryView":1222 * * cdef size_t itemsize = src.memview.view.itemsize * cdef size_t size = slice_get_size(src, ndim) # <<<<<<<<<<<<<< @@ -19711,7 +19774,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, */ __pyx_v_size = __pyx_memoryview_slice_get_size(__pyx_v_src, __pyx_v_ndim); - /* "View.MemoryView":1222 + /* "View.MemoryView":1224 * cdef size_t size = slice_get_size(src, ndim) * * result = malloc(size) # <<<<<<<<<<<<<< @@ -19720,7 +19783,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, */ __pyx_v_result = malloc(__pyx_v_size); - /* "View.MemoryView":1223 + /* "View.MemoryView":1225 * * result = malloc(size) * if not result: # <<<<<<<<<<<<<< @@ -19730,16 +19793,16 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, __pyx_t_2 = ((!(__pyx_v_result != 0)) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1224 + /* "View.MemoryView":1226 * result = malloc(size) * if not result: * _err(MemoryError, NULL) # <<<<<<<<<<<<<< * * */ - __pyx_t_3 = __pyx_memoryview_err(__pyx_builtin_MemoryError, NULL); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 1224, __pyx_L1_error) + __pyx_t_3 = __pyx_memoryview_err(__pyx_builtin_MemoryError, NULL); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(1, 1226, __pyx_L1_error) - /* "View.MemoryView":1223 + /* "View.MemoryView":1225 * * result = malloc(size) * if not result: # <<<<<<<<<<<<<< @@ -19748,7 +19811,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, */ } - /* "View.MemoryView":1227 + /* "View.MemoryView":1229 * * * tmpslice.data = result # <<<<<<<<<<<<<< @@ -19757,7 +19820,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, */ __pyx_v_tmpslice->data = ((char *)__pyx_v_result); - /* "View.MemoryView":1228 + /* "View.MemoryView":1230 * * tmpslice.data = result * tmpslice.memview = src.memview # <<<<<<<<<<<<<< @@ -19767,7 +19830,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, __pyx_t_4 = __pyx_v_src->memview; __pyx_v_tmpslice->memview = __pyx_t_4; - /* "View.MemoryView":1229 + /* "View.MemoryView":1231 * tmpslice.data = result * tmpslice.memview = src.memview * for i in range(ndim): # <<<<<<<<<<<<<< @@ -19779,7 +19842,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { __pyx_v_i = __pyx_t_6; - /* "View.MemoryView":1230 + /* "View.MemoryView":1232 * tmpslice.memview = src.memview * for i in range(ndim): * tmpslice.shape[i] = src.shape[i] # <<<<<<<<<<<<<< @@ -19788,7 +19851,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, */ (__pyx_v_tmpslice->shape[__pyx_v_i]) = (__pyx_v_src->shape[__pyx_v_i]); - /* "View.MemoryView":1231 + /* "View.MemoryView":1233 * for i in range(ndim): * tmpslice.shape[i] = src.shape[i] * tmpslice.suboffsets[i] = -1 # <<<<<<<<<<<<<< @@ -19798,7 +19861,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, (__pyx_v_tmpslice->suboffsets[__pyx_v_i]) = -1L; } - /* "View.MemoryView":1233 + /* "View.MemoryView":1235 * tmpslice.suboffsets[i] = -1 * * fill_contig_strides_array(&tmpslice.shape[0], &tmpslice.strides[0], itemsize, # <<<<<<<<<<<<<< @@ -19807,7 +19870,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, */ (void)(__pyx_fill_contig_strides_array((&(__pyx_v_tmpslice->shape[0])), (&(__pyx_v_tmpslice->strides[0])), __pyx_v_itemsize, __pyx_v_ndim, __pyx_v_order)); - /* "View.MemoryView":1237 + /* "View.MemoryView":1239 * * * for i in range(ndim): # <<<<<<<<<<<<<< @@ -19819,7 +19882,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) { __pyx_v_i = __pyx_t_6; - /* "View.MemoryView":1238 + /* "View.MemoryView":1240 * * for i in range(ndim): * if tmpslice.shape[i] == 1: # <<<<<<<<<<<<<< @@ -19829,7 +19892,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, __pyx_t_2 = (((__pyx_v_tmpslice->shape[__pyx_v_i]) == 1) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1239 + /* "View.MemoryView":1241 * for i in range(ndim): * if tmpslice.shape[i] == 1: * tmpslice.strides[i] = 0 # <<<<<<<<<<<<<< @@ -19838,7 +19901,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, */ (__pyx_v_tmpslice->strides[__pyx_v_i]) = 0; - /* "View.MemoryView":1238 + /* "View.MemoryView":1240 * * for i in range(ndim): * if tmpslice.shape[i] == 1: # <<<<<<<<<<<<<< @@ -19848,7 +19911,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, } } - /* "View.MemoryView":1241 + /* "View.MemoryView":1243 * tmpslice.strides[i] = 0 * * if slice_is_contig(src[0], order, ndim): # <<<<<<<<<<<<<< @@ -19858,7 +19921,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, __pyx_t_2 = (__pyx_memviewslice_is_contig((__pyx_v_src[0]), __pyx_v_order, __pyx_v_ndim) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1242 + /* "View.MemoryView":1244 * * if slice_is_contig(src[0], order, ndim): * memcpy(result, src.data, size) # <<<<<<<<<<<<<< @@ -19867,7 +19930,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, */ (void)(memcpy(__pyx_v_result, __pyx_v_src->data, __pyx_v_size)); - /* "View.MemoryView":1241 + /* "View.MemoryView":1243 * tmpslice.strides[i] = 0 * * if slice_is_contig(src[0], order, ndim): # <<<<<<<<<<<<<< @@ -19877,7 +19940,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, goto __pyx_L9; } - /* "View.MemoryView":1244 + /* "View.MemoryView":1246 * memcpy(result, src.data, size) * else: * copy_strided_to_strided(src, tmpslice, ndim, itemsize) # <<<<<<<<<<<<<< @@ -19889,7 +19952,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, } __pyx_L9:; - /* "View.MemoryView":1246 + /* "View.MemoryView":1248 * copy_strided_to_strided(src, tmpslice, ndim, itemsize) * * return result # <<<<<<<<<<<<<< @@ -19899,7 +19962,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, __pyx_r = __pyx_v_result; goto __pyx_L0; - /* "View.MemoryView":1208 + /* "View.MemoryView":1210 * * @cname('__pyx_memoryview_copy_data_to_temp') * cdef void *copy_data_to_temp(__Pyx_memviewslice *src, # <<<<<<<<<<<<<< @@ -19923,7 +19986,7 @@ static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *__pyx_v_src, return __pyx_r; } -/* "View.MemoryView":1251 +/* "View.MemoryView":1253 * * @cname('__pyx_memoryview_err_extents') * cdef int _err_extents(int i, Py_ssize_t extent1, # <<<<<<<<<<<<<< @@ -19946,20 +20009,20 @@ static int __pyx_memoryview_err_extents(int __pyx_v_i, Py_ssize_t __pyx_v_extent #endif __Pyx_RefNannySetupContext("_err_extents", 0); - /* "View.MemoryView":1254 + /* "View.MemoryView":1256 * Py_ssize_t extent2) except -1 with gil: * raise ValueError("got differing extents in dimension %d (got %d and %d)" % * (i, extent1, extent2)) # <<<<<<<<<<<<<< * * @cname('__pyx_memoryview_err_dim') */ - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_i); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 1254, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_i); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 1256, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_extent1); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1254, __pyx_L1_error) + __pyx_t_2 = PyInt_FromSsize_t(__pyx_v_extent1); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1256, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_extent2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1254, __pyx_L1_error) + __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_extent2); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1256, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1254, __pyx_L1_error) + __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1256, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); @@ -19971,24 +20034,24 @@ static int __pyx_memoryview_err_extents(int __pyx_v_i, Py_ssize_t __pyx_v_extent __pyx_t_2 = 0; __pyx_t_3 = 0; - /* "View.MemoryView":1253 + /* "View.MemoryView":1255 * cdef int _err_extents(int i, Py_ssize_t extent1, * Py_ssize_t extent2) except -1 with gil: * raise ValueError("got differing extents in dimension %d (got %d and %d)" % # <<<<<<<<<<<<<< * (i, extent1, extent2)) * */ - __pyx_t_3 = __Pyx_PyString_Format(__pyx_kp_s_got_differing_extents_in_dimensi, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1253, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyString_Format(__pyx_kp_s_got_differing_extents_in_dimensi, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1255, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_builtin_ValueError, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1253, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_builtin_ValueError, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1255, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_Raise(__pyx_t_4, 0, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __PYX_ERR(1, 1253, __pyx_L1_error) + __PYX_ERR(1, 1255, __pyx_L1_error) - /* "View.MemoryView":1251 + /* "View.MemoryView":1253 * * @cname('__pyx_memoryview_err_extents') * cdef int _err_extents(int i, Py_ssize_t extent1, # <<<<<<<<<<<<<< @@ -20011,7 +20074,7 @@ static int __pyx_memoryview_err_extents(int __pyx_v_i, Py_ssize_t __pyx_v_extent return __pyx_r; } -/* "View.MemoryView":1257 +/* "View.MemoryView":1259 * * @cname('__pyx_memoryview_err_dim') * cdef int _err_dim(object error, char *msg, int dim) except -1 with gil: # <<<<<<<<<<<<<< @@ -20035,18 +20098,18 @@ static int __pyx_memoryview_err_dim(PyObject *__pyx_v_error, char *__pyx_v_msg, __Pyx_RefNannySetupContext("_err_dim", 0); __Pyx_INCREF(__pyx_v_error); - /* "View.MemoryView":1258 + /* "View.MemoryView":1260 * @cname('__pyx_memoryview_err_dim') * cdef int _err_dim(object error, char *msg, int dim) except -1 with gil: * raise error(msg.decode('ascii') % dim) # <<<<<<<<<<<<<< * * @cname('__pyx_memoryview_err') */ - __pyx_t_2 = __Pyx_decode_c_string(__pyx_v_msg, 0, strlen(__pyx_v_msg), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1258, __pyx_L1_error) + __pyx_t_2 = __Pyx_decode_c_string(__pyx_v_msg, 0, strlen(__pyx_v_msg), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1260, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_dim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1258, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_dim); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1260, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PyUnicode_Format(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1258, __pyx_L1_error) + __pyx_t_4 = PyUnicode_Format(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 1260, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -20064,14 +20127,14 @@ static int __pyx_memoryview_err_dim(PyObject *__pyx_v_error, char *__pyx_v_msg, __pyx_t_1 = (__pyx_t_2) ? __Pyx_PyObject_Call2Args(__pyx_t_3, __pyx_t_2, __pyx_t_4) : __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_4); __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 1258, __pyx_L1_error) + if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 1260, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_Raise(__pyx_t_1, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __PYX_ERR(1, 1258, __pyx_L1_error) + __PYX_ERR(1, 1260, __pyx_L1_error) - /* "View.MemoryView":1257 + /* "View.MemoryView":1259 * * @cname('__pyx_memoryview_err_dim') * cdef int _err_dim(object error, char *msg, int dim) except -1 with gil: # <<<<<<<<<<<<<< @@ -20095,7 +20158,7 @@ static int __pyx_memoryview_err_dim(PyObject *__pyx_v_error, char *__pyx_v_msg, return __pyx_r; } -/* "View.MemoryView":1261 +/* "View.MemoryView":1263 * * @cname('__pyx_memoryview_err') * cdef int _err(object error, char *msg) except -1 with gil: # <<<<<<<<<<<<<< @@ -20120,7 +20183,7 @@ static int __pyx_memoryview_err(PyObject *__pyx_v_error, char *__pyx_v_msg) { __Pyx_RefNannySetupContext("_err", 0); __Pyx_INCREF(__pyx_v_error); - /* "View.MemoryView":1262 + /* "View.MemoryView":1264 * @cname('__pyx_memoryview_err') * cdef int _err(object error, char *msg) except -1 with gil: * if msg != NULL: # <<<<<<<<<<<<<< @@ -20130,14 +20193,14 @@ static int __pyx_memoryview_err(PyObject *__pyx_v_error, char *__pyx_v_msg) { __pyx_t_1 = ((__pyx_v_msg != NULL) != 0); if (unlikely(__pyx_t_1)) { - /* "View.MemoryView":1263 + /* "View.MemoryView":1265 * cdef int _err(object error, char *msg) except -1 with gil: * if msg != NULL: * raise error(msg.decode('ascii')) # <<<<<<<<<<<<<< * else: * raise error */ - __pyx_t_3 = __Pyx_decode_c_string(__pyx_v_msg, 0, strlen(__pyx_v_msg), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1263, __pyx_L1_error) + __pyx_t_3 = __Pyx_decode_c_string(__pyx_v_msg, 0, strlen(__pyx_v_msg), NULL, NULL, PyUnicode_DecodeASCII); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 1265, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_INCREF(__pyx_v_error); __pyx_t_4 = __pyx_v_error; __pyx_t_5 = NULL; @@ -20153,14 +20216,14 @@ static int __pyx_memoryview_err(PyObject *__pyx_v_error, char *__pyx_v_msg) { __pyx_t_2 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_5, __pyx_t_3) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_3); __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1263, __pyx_L1_error) + if (unlikely(!__pyx_t_2)) __PYX_ERR(1, 1265, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_Raise(__pyx_t_2, 0, 0, 0); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __PYX_ERR(1, 1263, __pyx_L1_error) + __PYX_ERR(1, 1265, __pyx_L1_error) - /* "View.MemoryView":1262 + /* "View.MemoryView":1264 * @cname('__pyx_memoryview_err') * cdef int _err(object error, char *msg) except -1 with gil: * if msg != NULL: # <<<<<<<<<<<<<< @@ -20169,7 +20232,7 @@ static int __pyx_memoryview_err(PyObject *__pyx_v_error, char *__pyx_v_msg) { */ } - /* "View.MemoryView":1265 + /* "View.MemoryView":1267 * raise error(msg.decode('ascii')) * else: * raise error # <<<<<<<<<<<<<< @@ -20178,10 +20241,10 @@ static int __pyx_memoryview_err(PyObject *__pyx_v_error, char *__pyx_v_msg) { */ /*else*/ { __Pyx_Raise(__pyx_v_error, 0, 0, 0); - __PYX_ERR(1, 1265, __pyx_L1_error) + __PYX_ERR(1, 1267, __pyx_L1_error) } - /* "View.MemoryView":1261 + /* "View.MemoryView":1263 * * @cname('__pyx_memoryview_err') * cdef int _err(object error, char *msg) except -1 with gil: # <<<<<<<<<<<<<< @@ -20205,7 +20268,7 @@ static int __pyx_memoryview_err(PyObject *__pyx_v_error, char *__pyx_v_msg) { return __pyx_r; } -/* "View.MemoryView":1268 +/* "View.MemoryView":1270 * * @cname('__pyx_memoryview_copy_contents') * cdef int memoryview_copy_contents(__Pyx_memviewslice src, # <<<<<<<<<<<<<< @@ -20235,7 +20298,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ const char *__pyx_filename = NULL; int __pyx_clineno = 0; - /* "View.MemoryView":1276 + /* "View.MemoryView":1278 * Check for overlapping memory and verify the shapes. * """ * cdef void *tmpdata = NULL # <<<<<<<<<<<<<< @@ -20244,7 +20307,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_v_tmpdata = NULL; - /* "View.MemoryView":1277 + /* "View.MemoryView":1279 * """ * cdef void *tmpdata = NULL * cdef size_t itemsize = src.memview.view.itemsize # <<<<<<<<<<<<<< @@ -20254,7 +20317,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_1 = __pyx_v_src.memview->view.itemsize; __pyx_v_itemsize = __pyx_t_1; - /* "View.MemoryView":1279 + /* "View.MemoryView":1281 * cdef size_t itemsize = src.memview.view.itemsize * cdef int i * cdef char order = get_best_order(&src, src_ndim) # <<<<<<<<<<<<<< @@ -20263,7 +20326,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_v_order = __pyx_get_best_slice_order((&__pyx_v_src), __pyx_v_src_ndim); - /* "View.MemoryView":1280 + /* "View.MemoryView":1282 * cdef int i * cdef char order = get_best_order(&src, src_ndim) * cdef bint broadcasting = False # <<<<<<<<<<<<<< @@ -20272,7 +20335,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_v_broadcasting = 0; - /* "View.MemoryView":1281 + /* "View.MemoryView":1283 * cdef char order = get_best_order(&src, src_ndim) * cdef bint broadcasting = False * cdef bint direct_copy = False # <<<<<<<<<<<<<< @@ -20281,7 +20344,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_v_direct_copy = 0; - /* "View.MemoryView":1284 + /* "View.MemoryView":1286 * cdef __Pyx_memviewslice tmp * * if src_ndim < dst_ndim: # <<<<<<<<<<<<<< @@ -20291,7 +20354,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_2 = ((__pyx_v_src_ndim < __pyx_v_dst_ndim) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1285 + /* "View.MemoryView":1287 * * if src_ndim < dst_ndim: * broadcast_leading(&src, src_ndim, dst_ndim) # <<<<<<<<<<<<<< @@ -20300,7 +20363,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_memoryview_broadcast_leading((&__pyx_v_src), __pyx_v_src_ndim, __pyx_v_dst_ndim); - /* "View.MemoryView":1284 + /* "View.MemoryView":1286 * cdef __Pyx_memviewslice tmp * * if src_ndim < dst_ndim: # <<<<<<<<<<<<<< @@ -20310,7 +20373,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ goto __pyx_L3; } - /* "View.MemoryView":1286 + /* "View.MemoryView":1288 * if src_ndim < dst_ndim: * broadcast_leading(&src, src_ndim, dst_ndim) * elif dst_ndim < src_ndim: # <<<<<<<<<<<<<< @@ -20320,7 +20383,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_2 = ((__pyx_v_dst_ndim < __pyx_v_src_ndim) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1287 + /* "View.MemoryView":1289 * broadcast_leading(&src, src_ndim, dst_ndim) * elif dst_ndim < src_ndim: * broadcast_leading(&dst, dst_ndim, src_ndim) # <<<<<<<<<<<<<< @@ -20329,7 +20392,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_memoryview_broadcast_leading((&__pyx_v_dst), __pyx_v_dst_ndim, __pyx_v_src_ndim); - /* "View.MemoryView":1286 + /* "View.MemoryView":1288 * if src_ndim < dst_ndim: * broadcast_leading(&src, src_ndim, dst_ndim) * elif dst_ndim < src_ndim: # <<<<<<<<<<<<<< @@ -20339,7 +20402,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ } __pyx_L3:; - /* "View.MemoryView":1289 + /* "View.MemoryView":1291 * broadcast_leading(&dst, dst_ndim, src_ndim) * * cdef int ndim = max(src_ndim, dst_ndim) # <<<<<<<<<<<<<< @@ -20355,7 +20418,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ } __pyx_v_ndim = __pyx_t_5; - /* "View.MemoryView":1291 + /* "View.MemoryView":1293 * cdef int ndim = max(src_ndim, dst_ndim) * * for i in range(ndim): # <<<<<<<<<<<<<< @@ -20367,7 +20430,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_i = __pyx_t_4; - /* "View.MemoryView":1292 + /* "View.MemoryView":1294 * * for i in range(ndim): * if src.shape[i] != dst.shape[i]: # <<<<<<<<<<<<<< @@ -20377,7 +20440,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_2 = (((__pyx_v_src.shape[__pyx_v_i]) != (__pyx_v_dst.shape[__pyx_v_i])) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1293 + /* "View.MemoryView":1295 * for i in range(ndim): * if src.shape[i] != dst.shape[i]: * if src.shape[i] == 1: # <<<<<<<<<<<<<< @@ -20387,7 +20450,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_2 = (((__pyx_v_src.shape[__pyx_v_i]) == 1) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1294 + /* "View.MemoryView":1296 * if src.shape[i] != dst.shape[i]: * if src.shape[i] == 1: * broadcasting = True # <<<<<<<<<<<<<< @@ -20396,7 +20459,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_v_broadcasting = 1; - /* "View.MemoryView":1295 + /* "View.MemoryView":1297 * if src.shape[i] == 1: * broadcasting = True * src.strides[i] = 0 # <<<<<<<<<<<<<< @@ -20405,7 +20468,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ (__pyx_v_src.strides[__pyx_v_i]) = 0; - /* "View.MemoryView":1293 + /* "View.MemoryView":1295 * for i in range(ndim): * if src.shape[i] != dst.shape[i]: * if src.shape[i] == 1: # <<<<<<<<<<<<<< @@ -20415,7 +20478,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ goto __pyx_L7; } - /* "View.MemoryView":1297 + /* "View.MemoryView":1299 * src.strides[i] = 0 * else: * _err_extents(i, dst.shape[i], src.shape[i]) # <<<<<<<<<<<<<< @@ -20423,11 +20486,11 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ * if src.suboffsets[i] >= 0: */ /*else*/ { - __pyx_t_6 = __pyx_memoryview_err_extents(__pyx_v_i, (__pyx_v_dst.shape[__pyx_v_i]), (__pyx_v_src.shape[__pyx_v_i])); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(1, 1297, __pyx_L1_error) + __pyx_t_6 = __pyx_memoryview_err_extents(__pyx_v_i, (__pyx_v_dst.shape[__pyx_v_i]), (__pyx_v_src.shape[__pyx_v_i])); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(1, 1299, __pyx_L1_error) } __pyx_L7:; - /* "View.MemoryView":1292 + /* "View.MemoryView":1294 * * for i in range(ndim): * if src.shape[i] != dst.shape[i]: # <<<<<<<<<<<<<< @@ -20436,7 +20499,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ } - /* "View.MemoryView":1299 + /* "View.MemoryView":1301 * _err_extents(i, dst.shape[i], src.shape[i]) * * if src.suboffsets[i] >= 0: # <<<<<<<<<<<<<< @@ -20446,16 +20509,16 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_2 = (((__pyx_v_src.suboffsets[__pyx_v_i]) >= 0) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1300 + /* "View.MemoryView":1302 * * if src.suboffsets[i] >= 0: * _err_dim(ValueError, "Dimension %d is not direct", i) # <<<<<<<<<<<<<< * * if slices_overlap(&src, &dst, ndim, itemsize): */ - __pyx_t_6 = __pyx_memoryview_err_dim(__pyx_builtin_ValueError, ((char *)"Dimension %d is not direct"), __pyx_v_i); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(1, 1300, __pyx_L1_error) + __pyx_t_6 = __pyx_memoryview_err_dim(__pyx_builtin_ValueError, ((char *)"Dimension %d is not direct"), __pyx_v_i); if (unlikely(__pyx_t_6 == ((int)-1))) __PYX_ERR(1, 1302, __pyx_L1_error) - /* "View.MemoryView":1299 + /* "View.MemoryView":1301 * _err_extents(i, dst.shape[i], src.shape[i]) * * if src.suboffsets[i] >= 0: # <<<<<<<<<<<<<< @@ -20465,7 +20528,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ } } - /* "View.MemoryView":1302 + /* "View.MemoryView":1304 * _err_dim(ValueError, "Dimension %d is not direct", i) * * if slices_overlap(&src, &dst, ndim, itemsize): # <<<<<<<<<<<<<< @@ -20475,7 +20538,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_2 = (__pyx_slices_overlap((&__pyx_v_src), (&__pyx_v_dst), __pyx_v_ndim, __pyx_v_itemsize) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1304 + /* "View.MemoryView":1306 * if slices_overlap(&src, &dst, ndim, itemsize): * * if not slice_is_contig(src, order, ndim): # <<<<<<<<<<<<<< @@ -20485,7 +20548,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_2 = ((!(__pyx_memviewslice_is_contig(__pyx_v_src, __pyx_v_order, __pyx_v_ndim) != 0)) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1305 + /* "View.MemoryView":1307 * * if not slice_is_contig(src, order, ndim): * order = get_best_order(&dst, ndim) # <<<<<<<<<<<<<< @@ -20494,7 +20557,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_v_order = __pyx_get_best_slice_order((&__pyx_v_dst), __pyx_v_ndim); - /* "View.MemoryView":1304 + /* "View.MemoryView":1306 * if slices_overlap(&src, &dst, ndim, itemsize): * * if not slice_is_contig(src, order, ndim): # <<<<<<<<<<<<<< @@ -20503,17 +20566,17 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ } - /* "View.MemoryView":1307 + /* "View.MemoryView":1309 * order = get_best_order(&dst, ndim) * * tmpdata = copy_data_to_temp(&src, &tmp, order, ndim) # <<<<<<<<<<<<<< * src = tmp * */ - __pyx_t_7 = __pyx_memoryview_copy_data_to_temp((&__pyx_v_src), (&__pyx_v_tmp), __pyx_v_order, __pyx_v_ndim); if (unlikely(__pyx_t_7 == ((void *)NULL))) __PYX_ERR(1, 1307, __pyx_L1_error) + __pyx_t_7 = __pyx_memoryview_copy_data_to_temp((&__pyx_v_src), (&__pyx_v_tmp), __pyx_v_order, __pyx_v_ndim); if (unlikely(__pyx_t_7 == ((void *)NULL))) __PYX_ERR(1, 1309, __pyx_L1_error) __pyx_v_tmpdata = __pyx_t_7; - /* "View.MemoryView":1308 + /* "View.MemoryView":1310 * * tmpdata = copy_data_to_temp(&src, &tmp, order, ndim) * src = tmp # <<<<<<<<<<<<<< @@ -20522,7 +20585,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_v_src = __pyx_v_tmp; - /* "View.MemoryView":1302 + /* "View.MemoryView":1304 * _err_dim(ValueError, "Dimension %d is not direct", i) * * if slices_overlap(&src, &dst, ndim, itemsize): # <<<<<<<<<<<<<< @@ -20531,7 +20594,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ } - /* "View.MemoryView":1310 + /* "View.MemoryView":1312 * src = tmp * * if not broadcasting: # <<<<<<<<<<<<<< @@ -20541,7 +20604,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_2 = ((!(__pyx_v_broadcasting != 0)) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1313 + /* "View.MemoryView":1315 * * * if slice_is_contig(src, 'C', ndim): # <<<<<<<<<<<<<< @@ -20551,7 +20614,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_2 = (__pyx_memviewslice_is_contig(__pyx_v_src, 'C', __pyx_v_ndim) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1314 + /* "View.MemoryView":1316 * * if slice_is_contig(src, 'C', ndim): * direct_copy = slice_is_contig(dst, 'C', ndim) # <<<<<<<<<<<<<< @@ -20560,7 +20623,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_v_direct_copy = __pyx_memviewslice_is_contig(__pyx_v_dst, 'C', __pyx_v_ndim); - /* "View.MemoryView":1313 + /* "View.MemoryView":1315 * * * if slice_is_contig(src, 'C', ndim): # <<<<<<<<<<<<<< @@ -20570,7 +20633,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ goto __pyx_L12; } - /* "View.MemoryView":1315 + /* "View.MemoryView":1317 * if slice_is_contig(src, 'C', ndim): * direct_copy = slice_is_contig(dst, 'C', ndim) * elif slice_is_contig(src, 'F', ndim): # <<<<<<<<<<<<<< @@ -20580,7 +20643,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_2 = (__pyx_memviewslice_is_contig(__pyx_v_src, 'F', __pyx_v_ndim) != 0); if (__pyx_t_2) { - /* "View.MemoryView":1316 + /* "View.MemoryView":1318 * direct_copy = slice_is_contig(dst, 'C', ndim) * elif slice_is_contig(src, 'F', ndim): * direct_copy = slice_is_contig(dst, 'F', ndim) # <<<<<<<<<<<<<< @@ -20589,7 +20652,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_v_direct_copy = __pyx_memviewslice_is_contig(__pyx_v_dst, 'F', __pyx_v_ndim); - /* "View.MemoryView":1315 + /* "View.MemoryView":1317 * if slice_is_contig(src, 'C', ndim): * direct_copy = slice_is_contig(dst, 'C', ndim) * elif slice_is_contig(src, 'F', ndim): # <<<<<<<<<<<<<< @@ -20599,7 +20662,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ } __pyx_L12:; - /* "View.MemoryView":1318 + /* "View.MemoryView":1320 * direct_copy = slice_is_contig(dst, 'F', ndim) * * if direct_copy: # <<<<<<<<<<<<<< @@ -20609,7 +20672,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_2 = (__pyx_v_direct_copy != 0); if (__pyx_t_2) { - /* "View.MemoryView":1320 + /* "View.MemoryView":1322 * if direct_copy: * * refcount_copying(&dst, dtype_is_object, ndim, False) # <<<<<<<<<<<<<< @@ -20618,7 +20681,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 0); - /* "View.MemoryView":1321 + /* "View.MemoryView":1323 * * refcount_copying(&dst, dtype_is_object, ndim, False) * memcpy(dst.data, src.data, slice_get_size(&src, ndim)) # <<<<<<<<<<<<<< @@ -20627,7 +20690,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ (void)(memcpy(__pyx_v_dst.data, __pyx_v_src.data, __pyx_memoryview_slice_get_size((&__pyx_v_src), __pyx_v_ndim))); - /* "View.MemoryView":1322 + /* "View.MemoryView":1324 * refcount_copying(&dst, dtype_is_object, ndim, False) * memcpy(dst.data, src.data, slice_get_size(&src, ndim)) * refcount_copying(&dst, dtype_is_object, ndim, True) # <<<<<<<<<<<<<< @@ -20636,7 +20699,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 1); - /* "View.MemoryView":1323 + /* "View.MemoryView":1325 * memcpy(dst.data, src.data, slice_get_size(&src, ndim)) * refcount_copying(&dst, dtype_is_object, ndim, True) * free(tmpdata) # <<<<<<<<<<<<<< @@ -20645,7 +20708,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ free(__pyx_v_tmpdata); - /* "View.MemoryView":1324 + /* "View.MemoryView":1326 * refcount_copying(&dst, dtype_is_object, ndim, True) * free(tmpdata) * return 0 # <<<<<<<<<<<<<< @@ -20655,7 +20718,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_r = 0; goto __pyx_L0; - /* "View.MemoryView":1318 + /* "View.MemoryView":1320 * direct_copy = slice_is_contig(dst, 'F', ndim) * * if direct_copy: # <<<<<<<<<<<<<< @@ -20664,7 +20727,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ } - /* "View.MemoryView":1310 + /* "View.MemoryView":1312 * src = tmp * * if not broadcasting: # <<<<<<<<<<<<<< @@ -20673,7 +20736,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ } - /* "View.MemoryView":1326 + /* "View.MemoryView":1328 * return 0 * * if order == 'F' == get_best_order(&dst, ndim): # <<<<<<<<<<<<<< @@ -20687,25 +20750,25 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_t_8 = (__pyx_t_2 != 0); if (__pyx_t_8) { - /* "View.MemoryView":1329 + /* "View.MemoryView":1331 * * * transpose_memslice(&src) # <<<<<<<<<<<<<< * transpose_memslice(&dst) * */ - __pyx_t_5 = __pyx_memslice_transpose((&__pyx_v_src)); if (unlikely(__pyx_t_5 == ((int)0))) __PYX_ERR(1, 1329, __pyx_L1_error) + __pyx_t_5 = __pyx_memslice_transpose((&__pyx_v_src)); if (unlikely(__pyx_t_5 == ((int)0))) __PYX_ERR(1, 1331, __pyx_L1_error) - /* "View.MemoryView":1330 + /* "View.MemoryView":1332 * * transpose_memslice(&src) * transpose_memslice(&dst) # <<<<<<<<<<<<<< * * refcount_copying(&dst, dtype_is_object, ndim, False) */ - __pyx_t_5 = __pyx_memslice_transpose((&__pyx_v_dst)); if (unlikely(__pyx_t_5 == ((int)0))) __PYX_ERR(1, 1330, __pyx_L1_error) + __pyx_t_5 = __pyx_memslice_transpose((&__pyx_v_dst)); if (unlikely(__pyx_t_5 == ((int)0))) __PYX_ERR(1, 1332, __pyx_L1_error) - /* "View.MemoryView":1326 + /* "View.MemoryView":1328 * return 0 * * if order == 'F' == get_best_order(&dst, ndim): # <<<<<<<<<<<<<< @@ -20714,7 +20777,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ } - /* "View.MemoryView":1332 + /* "View.MemoryView":1334 * transpose_memslice(&dst) * * refcount_copying(&dst, dtype_is_object, ndim, False) # <<<<<<<<<<<<<< @@ -20723,7 +20786,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 0); - /* "View.MemoryView":1333 + /* "View.MemoryView":1335 * * refcount_copying(&dst, dtype_is_object, ndim, False) * copy_strided_to_strided(&src, &dst, ndim, itemsize) # <<<<<<<<<<<<<< @@ -20732,7 +20795,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ copy_strided_to_strided((&__pyx_v_src), (&__pyx_v_dst), __pyx_v_ndim, __pyx_v_itemsize); - /* "View.MemoryView":1334 + /* "View.MemoryView":1336 * refcount_copying(&dst, dtype_is_object, ndim, False) * copy_strided_to_strided(&src, &dst, ndim, itemsize) * refcount_copying(&dst, dtype_is_object, ndim, True) # <<<<<<<<<<<<<< @@ -20741,7 +20804,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ __pyx_memoryview_refcount_copying((&__pyx_v_dst), __pyx_v_dtype_is_object, __pyx_v_ndim, 1); - /* "View.MemoryView":1336 + /* "View.MemoryView":1338 * refcount_copying(&dst, dtype_is_object, ndim, True) * * free(tmpdata) # <<<<<<<<<<<<<< @@ -20750,7 +20813,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ */ free(__pyx_v_tmpdata); - /* "View.MemoryView":1337 + /* "View.MemoryView":1339 * * free(tmpdata) * return 0 # <<<<<<<<<<<<<< @@ -20760,7 +20823,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ __pyx_r = 0; goto __pyx_L0; - /* "View.MemoryView":1268 + /* "View.MemoryView":1270 * * @cname('__pyx_memoryview_copy_contents') * cdef int memoryview_copy_contents(__Pyx_memviewslice src, # <<<<<<<<<<<<<< @@ -20784,7 +20847,7 @@ static int __pyx_memoryview_copy_contents(__Pyx_memviewslice __pyx_v_src, __Pyx_ return __pyx_r; } -/* "View.MemoryView":1340 +/* "View.MemoryView":1342 * * @cname('__pyx_memoryview_broadcast_leading') * cdef void broadcast_leading(__Pyx_memviewslice *mslice, # <<<<<<<<<<<<<< @@ -20799,7 +20862,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic int __pyx_t_2; int __pyx_t_3; - /* "View.MemoryView":1344 + /* "View.MemoryView":1346 * int ndim_other) nogil: * cdef int i * cdef int offset = ndim_other - ndim # <<<<<<<<<<<<<< @@ -20808,7 +20871,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic */ __pyx_v_offset = (__pyx_v_ndim_other - __pyx_v_ndim); - /* "View.MemoryView":1346 + /* "View.MemoryView":1348 * cdef int offset = ndim_other - ndim * * for i in range(ndim - 1, -1, -1): # <<<<<<<<<<<<<< @@ -20818,7 +20881,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic for (__pyx_t_1 = (__pyx_v_ndim - 1); __pyx_t_1 > -1; __pyx_t_1-=1) { __pyx_v_i = __pyx_t_1; - /* "View.MemoryView":1347 + /* "View.MemoryView":1349 * * for i in range(ndim - 1, -1, -1): * mslice.shape[i + offset] = mslice.shape[i] # <<<<<<<<<<<<<< @@ -20827,7 +20890,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic */ (__pyx_v_mslice->shape[(__pyx_v_i + __pyx_v_offset)]) = (__pyx_v_mslice->shape[__pyx_v_i]); - /* "View.MemoryView":1348 + /* "View.MemoryView":1350 * for i in range(ndim - 1, -1, -1): * mslice.shape[i + offset] = mslice.shape[i] * mslice.strides[i + offset] = mslice.strides[i] # <<<<<<<<<<<<<< @@ -20836,7 +20899,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic */ (__pyx_v_mslice->strides[(__pyx_v_i + __pyx_v_offset)]) = (__pyx_v_mslice->strides[__pyx_v_i]); - /* "View.MemoryView":1349 + /* "View.MemoryView":1351 * mslice.shape[i + offset] = mslice.shape[i] * mslice.strides[i + offset] = mslice.strides[i] * mslice.suboffsets[i + offset] = mslice.suboffsets[i] # <<<<<<<<<<<<<< @@ -20846,7 +20909,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic (__pyx_v_mslice->suboffsets[(__pyx_v_i + __pyx_v_offset)]) = (__pyx_v_mslice->suboffsets[__pyx_v_i]); } - /* "View.MemoryView":1351 + /* "View.MemoryView":1353 * mslice.suboffsets[i + offset] = mslice.suboffsets[i] * * for i in range(offset): # <<<<<<<<<<<<<< @@ -20858,7 +20921,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "View.MemoryView":1352 + /* "View.MemoryView":1354 * * for i in range(offset): * mslice.shape[i] = 1 # <<<<<<<<<<<<<< @@ -20867,7 +20930,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic */ (__pyx_v_mslice->shape[__pyx_v_i]) = 1; - /* "View.MemoryView":1353 + /* "View.MemoryView":1355 * for i in range(offset): * mslice.shape[i] = 1 * mslice.strides[i] = mslice.strides[0] # <<<<<<<<<<<<<< @@ -20876,7 +20939,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic */ (__pyx_v_mslice->strides[__pyx_v_i]) = (__pyx_v_mslice->strides[0]); - /* "View.MemoryView":1354 + /* "View.MemoryView":1356 * mslice.shape[i] = 1 * mslice.strides[i] = mslice.strides[0] * mslice.suboffsets[i] = -1 # <<<<<<<<<<<<<< @@ -20886,7 +20949,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic (__pyx_v_mslice->suboffsets[__pyx_v_i]) = -1L; } - /* "View.MemoryView":1340 + /* "View.MemoryView":1342 * * @cname('__pyx_memoryview_broadcast_leading') * cdef void broadcast_leading(__Pyx_memviewslice *mslice, # <<<<<<<<<<<<<< @@ -20897,7 +20960,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic /* function exit code */ } -/* "View.MemoryView":1362 +/* "View.MemoryView":1364 * * @cname('__pyx_memoryview_refcount_copying') * cdef void refcount_copying(__Pyx_memviewslice *dst, bint dtype_is_object, # <<<<<<<<<<<<<< @@ -20908,7 +20971,7 @@ static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *__pyx_v_mslic static void __pyx_memoryview_refcount_copying(__Pyx_memviewslice *__pyx_v_dst, int __pyx_v_dtype_is_object, int __pyx_v_ndim, int __pyx_v_inc) { int __pyx_t_1; - /* "View.MemoryView":1366 + /* "View.MemoryView":1368 * * * if dtype_is_object: # <<<<<<<<<<<<<< @@ -20918,7 +20981,7 @@ static void __pyx_memoryview_refcount_copying(__Pyx_memviewslice *__pyx_v_dst, i __pyx_t_1 = (__pyx_v_dtype_is_object != 0); if (__pyx_t_1) { - /* "View.MemoryView":1367 + /* "View.MemoryView":1369 * * if dtype_is_object: * refcount_objects_in_slice_with_gil(dst.data, dst.shape, # <<<<<<<<<<<<<< @@ -20927,7 +20990,7 @@ static void __pyx_memoryview_refcount_copying(__Pyx_memviewslice *__pyx_v_dst, i */ __pyx_memoryview_refcount_objects_in_slice_with_gil(__pyx_v_dst->data, __pyx_v_dst->shape, __pyx_v_dst->strides, __pyx_v_ndim, __pyx_v_inc); - /* "View.MemoryView":1366 + /* "View.MemoryView":1368 * * * if dtype_is_object: # <<<<<<<<<<<<<< @@ -20936,7 +20999,7 @@ static void __pyx_memoryview_refcount_copying(__Pyx_memviewslice *__pyx_v_dst, i */ } - /* "View.MemoryView":1362 + /* "View.MemoryView":1364 * * @cname('__pyx_memoryview_refcount_copying') * cdef void refcount_copying(__Pyx_memviewslice *dst, bint dtype_is_object, # <<<<<<<<<<<<<< @@ -20947,7 +21010,7 @@ static void __pyx_memoryview_refcount_copying(__Pyx_memviewslice *__pyx_v_dst, i /* function exit code */ } -/* "View.MemoryView":1371 +/* "View.MemoryView":1373 * * @cname('__pyx_memoryview_refcount_objects_in_slice_with_gil') * cdef void refcount_objects_in_slice_with_gil(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< @@ -20962,7 +21025,7 @@ static void __pyx_memoryview_refcount_objects_in_slice_with_gil(char *__pyx_v_da #endif __Pyx_RefNannySetupContext("refcount_objects_in_slice_with_gil", 0); - /* "View.MemoryView":1374 + /* "View.MemoryView":1376 * Py_ssize_t *strides, int ndim, * bint inc) with gil: * refcount_objects_in_slice(data, shape, strides, ndim, inc) # <<<<<<<<<<<<<< @@ -20971,7 +21034,7 @@ static void __pyx_memoryview_refcount_objects_in_slice_with_gil(char *__pyx_v_da */ __pyx_memoryview_refcount_objects_in_slice(__pyx_v_data, __pyx_v_shape, __pyx_v_strides, __pyx_v_ndim, __pyx_v_inc); - /* "View.MemoryView":1371 + /* "View.MemoryView":1373 * * @cname('__pyx_memoryview_refcount_objects_in_slice_with_gil') * cdef void refcount_objects_in_slice_with_gil(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< @@ -20986,7 +21049,7 @@ static void __pyx_memoryview_refcount_objects_in_slice_with_gil(char *__pyx_v_da #endif } -/* "View.MemoryView":1377 +/* "View.MemoryView":1379 * * @cname('__pyx_memoryview_refcount_objects_in_slice') * cdef void refcount_objects_in_slice(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< @@ -21003,7 +21066,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss int __pyx_t_4; __Pyx_RefNannySetupContext("refcount_objects_in_slice", 0); - /* "View.MemoryView":1381 + /* "View.MemoryView":1383 * cdef Py_ssize_t i * * for i in range(shape[0]): # <<<<<<<<<<<<<< @@ -21015,7 +21078,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "View.MemoryView":1382 + /* "View.MemoryView":1384 * * for i in range(shape[0]): * if ndim == 1: # <<<<<<<<<<<<<< @@ -21025,7 +21088,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss __pyx_t_4 = ((__pyx_v_ndim == 1) != 0); if (__pyx_t_4) { - /* "View.MemoryView":1383 + /* "View.MemoryView":1385 * for i in range(shape[0]): * if ndim == 1: * if inc: # <<<<<<<<<<<<<< @@ -21035,7 +21098,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss __pyx_t_4 = (__pyx_v_inc != 0); if (__pyx_t_4) { - /* "View.MemoryView":1384 + /* "View.MemoryView":1386 * if ndim == 1: * if inc: * Py_INCREF(( data)[0]) # <<<<<<<<<<<<<< @@ -21044,7 +21107,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss */ Py_INCREF((((PyObject **)__pyx_v_data)[0])); - /* "View.MemoryView":1383 + /* "View.MemoryView":1385 * for i in range(shape[0]): * if ndim == 1: * if inc: # <<<<<<<<<<<<<< @@ -21054,7 +21117,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss goto __pyx_L6; } - /* "View.MemoryView":1386 + /* "View.MemoryView":1388 * Py_INCREF(( data)[0]) * else: * Py_DECREF(( data)[0]) # <<<<<<<<<<<<<< @@ -21066,7 +21129,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss } __pyx_L6:; - /* "View.MemoryView":1382 + /* "View.MemoryView":1384 * * for i in range(shape[0]): * if ndim == 1: # <<<<<<<<<<<<<< @@ -21076,7 +21139,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss goto __pyx_L5; } - /* "View.MemoryView":1388 + /* "View.MemoryView":1390 * Py_DECREF(( data)[0]) * else: * refcount_objects_in_slice(data, shape + 1, strides + 1, # <<<<<<<<<<<<<< @@ -21085,7 +21148,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss */ /*else*/ { - /* "View.MemoryView":1389 + /* "View.MemoryView":1391 * else: * refcount_objects_in_slice(data, shape + 1, strides + 1, * ndim - 1, inc) # <<<<<<<<<<<<<< @@ -21096,7 +21159,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss } __pyx_L5:; - /* "View.MemoryView":1391 + /* "View.MemoryView":1393 * ndim - 1, inc) * * data += strides[0] # <<<<<<<<<<<<<< @@ -21106,7 +21169,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss __pyx_v_data = (__pyx_v_data + (__pyx_v_strides[0])); } - /* "View.MemoryView":1377 + /* "View.MemoryView":1379 * * @cname('__pyx_memoryview_refcount_objects_in_slice') * cdef void refcount_objects_in_slice(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< @@ -21118,7 +21181,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss __Pyx_RefNannyFinishContext(); } -/* "View.MemoryView":1397 +/* "View.MemoryView":1399 * * @cname('__pyx_memoryview_slice_assign_scalar') * cdef void slice_assign_scalar(__Pyx_memviewslice *dst, int ndim, # <<<<<<<<<<<<<< @@ -21128,7 +21191,7 @@ static void __pyx_memoryview_refcount_objects_in_slice(char *__pyx_v_data, Py_ss static void __pyx_memoryview_slice_assign_scalar(__Pyx_memviewslice *__pyx_v_dst, int __pyx_v_ndim, size_t __pyx_v_itemsize, void *__pyx_v_item, int __pyx_v_dtype_is_object) { - /* "View.MemoryView":1400 + /* "View.MemoryView":1402 * size_t itemsize, void *item, * bint dtype_is_object) nogil: * refcount_copying(dst, dtype_is_object, ndim, False) # <<<<<<<<<<<<<< @@ -21137,7 +21200,7 @@ static void __pyx_memoryview_slice_assign_scalar(__Pyx_memviewslice *__pyx_v_dst */ __pyx_memoryview_refcount_copying(__pyx_v_dst, __pyx_v_dtype_is_object, __pyx_v_ndim, 0); - /* "View.MemoryView":1401 + /* "View.MemoryView":1403 * bint dtype_is_object) nogil: * refcount_copying(dst, dtype_is_object, ndim, False) * _slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim, # <<<<<<<<<<<<<< @@ -21146,7 +21209,7 @@ static void __pyx_memoryview_slice_assign_scalar(__Pyx_memviewslice *__pyx_v_dst */ __pyx_memoryview__slice_assign_scalar(__pyx_v_dst->data, __pyx_v_dst->shape, __pyx_v_dst->strides, __pyx_v_ndim, __pyx_v_itemsize, __pyx_v_item); - /* "View.MemoryView":1403 + /* "View.MemoryView":1405 * _slice_assign_scalar(dst.data, dst.shape, dst.strides, ndim, * itemsize, item) * refcount_copying(dst, dtype_is_object, ndim, True) # <<<<<<<<<<<<<< @@ -21155,7 +21218,7 @@ static void __pyx_memoryview_slice_assign_scalar(__Pyx_memviewslice *__pyx_v_dst */ __pyx_memoryview_refcount_copying(__pyx_v_dst, __pyx_v_dtype_is_object, __pyx_v_ndim, 1); - /* "View.MemoryView":1397 + /* "View.MemoryView":1399 * * @cname('__pyx_memoryview_slice_assign_scalar') * cdef void slice_assign_scalar(__Pyx_memviewslice *dst, int ndim, # <<<<<<<<<<<<<< @@ -21166,7 +21229,7 @@ static void __pyx_memoryview_slice_assign_scalar(__Pyx_memviewslice *__pyx_v_dst /* function exit code */ } -/* "View.MemoryView":1407 +/* "View.MemoryView":1409 * * @cname('__pyx_memoryview__slice_assign_scalar') * cdef void _slice_assign_scalar(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< @@ -21183,7 +21246,7 @@ static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t Py_ssize_t __pyx_t_3; Py_ssize_t __pyx_t_4; - /* "View.MemoryView":1411 + /* "View.MemoryView":1413 * size_t itemsize, void *item) nogil: * cdef Py_ssize_t i * cdef Py_ssize_t stride = strides[0] # <<<<<<<<<<<<<< @@ -21192,7 +21255,7 @@ static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t */ __pyx_v_stride = (__pyx_v_strides[0]); - /* "View.MemoryView":1412 + /* "View.MemoryView":1414 * cdef Py_ssize_t i * cdef Py_ssize_t stride = strides[0] * cdef Py_ssize_t extent = shape[0] # <<<<<<<<<<<<<< @@ -21201,7 +21264,7 @@ static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t */ __pyx_v_extent = (__pyx_v_shape[0]); - /* "View.MemoryView":1414 + /* "View.MemoryView":1416 * cdef Py_ssize_t extent = shape[0] * * if ndim == 1: # <<<<<<<<<<<<<< @@ -21211,7 +21274,7 @@ static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t __pyx_t_1 = ((__pyx_v_ndim == 1) != 0); if (__pyx_t_1) { - /* "View.MemoryView":1415 + /* "View.MemoryView":1417 * * if ndim == 1: * for i in range(extent): # <<<<<<<<<<<<<< @@ -21223,7 +21286,7 @@ static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_i = __pyx_t_4; - /* "View.MemoryView":1416 + /* "View.MemoryView":1418 * if ndim == 1: * for i in range(extent): * memcpy(data, item, itemsize) # <<<<<<<<<<<<<< @@ -21232,7 +21295,7 @@ static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t */ (void)(memcpy(__pyx_v_data, __pyx_v_item, __pyx_v_itemsize)); - /* "View.MemoryView":1417 + /* "View.MemoryView":1419 * for i in range(extent): * memcpy(data, item, itemsize) * data += stride # <<<<<<<<<<<<<< @@ -21242,7 +21305,7 @@ static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t __pyx_v_data = (__pyx_v_data + __pyx_v_stride); } - /* "View.MemoryView":1414 + /* "View.MemoryView":1416 * cdef Py_ssize_t extent = shape[0] * * if ndim == 1: # <<<<<<<<<<<<<< @@ -21252,7 +21315,7 @@ static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t goto __pyx_L3; } - /* "View.MemoryView":1419 + /* "View.MemoryView":1421 * data += stride * else: * for i in range(extent): # <<<<<<<<<<<<<< @@ -21265,7 +21328,7 @@ static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { __pyx_v_i = __pyx_t_4; - /* "View.MemoryView":1420 + /* "View.MemoryView":1422 * else: * for i in range(extent): * _slice_assign_scalar(data, shape + 1, strides + 1, # <<<<<<<<<<<<<< @@ -21274,7 +21337,7 @@ static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t */ __pyx_memoryview__slice_assign_scalar(__pyx_v_data, (__pyx_v_shape + 1), (__pyx_v_strides + 1), (__pyx_v_ndim - 1), __pyx_v_itemsize, __pyx_v_item); - /* "View.MemoryView":1422 + /* "View.MemoryView":1424 * _slice_assign_scalar(data, shape + 1, strides + 1, * ndim - 1, itemsize, item) * data += stride # <<<<<<<<<<<<<< @@ -21286,7 +21349,7 @@ static void __pyx_memoryview__slice_assign_scalar(char *__pyx_v_data, Py_ssize_t } __pyx_L3:; - /* "View.MemoryView":1407 + /* "View.MemoryView":1409 * * @cname('__pyx_memoryview__slice_assign_scalar') * cdef void _slice_assign_scalar(char *data, Py_ssize_t *shape, # <<<<<<<<<<<<<< @@ -23798,9 +23861,9 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 194, __pyx_L1_error) __pyx_builtin_KeyError = __Pyx_GetBuiltinName(__pyx_n_s_KeyError); if (!__pyx_builtin_KeyError) __PYX_ERR(0, 308, __pyx_L1_error) __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(0, 435, __pyx_L1_error) - __pyx_builtin_Ellipsis = __Pyx_GetBuiltinName(__pyx_n_s_Ellipsis); if (!__pyx_builtin_Ellipsis) __PYX_ERR(1, 404, __pyx_L1_error) - __pyx_builtin_id = __Pyx_GetBuiltinName(__pyx_n_s_id); if (!__pyx_builtin_id) __PYX_ERR(1, 613, __pyx_L1_error) - __pyx_builtin_IndexError = __Pyx_GetBuiltinName(__pyx_n_s_IndexError); if (!__pyx_builtin_IndexError) __PYX_ERR(1, 832, __pyx_L1_error) + __pyx_builtin_Ellipsis = __Pyx_GetBuiltinName(__pyx_n_s_Ellipsis); if (!__pyx_builtin_Ellipsis) __PYX_ERR(1, 406, __pyx_L1_error) + __pyx_builtin_id = __Pyx_GetBuiltinName(__pyx_n_s_id); if (!__pyx_builtin_id) __PYX_ERR(1, 615, __pyx_L1_error) + __pyx_builtin_IndexError = __Pyx_GetBuiltinName(__pyx_n_s_IndexError); if (!__pyx_builtin_IndexError) __PYX_ERR(1, 834, __pyx_L1_error) return 0; __pyx_L1_error:; return -1; @@ -23950,58 +24013,58 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - /* "View.MemoryView":133 + /* "View.MemoryView":134 * * if not self.ndim: * raise ValueError("Empty shape tuple for cython.array") # <<<<<<<<<<<<<< * * if itemsize <= 0: */ - __pyx_tuple__16 = PyTuple_Pack(1, __pyx_kp_s_Empty_shape_tuple_for_cython_arr); if (unlikely(!__pyx_tuple__16)) __PYX_ERR(1, 133, __pyx_L1_error) + __pyx_tuple__16 = PyTuple_Pack(1, __pyx_kp_s_Empty_shape_tuple_for_cython_arr); if (unlikely(!__pyx_tuple__16)) __PYX_ERR(1, 134, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__16); __Pyx_GIVEREF(__pyx_tuple__16); - /* "View.MemoryView":136 + /* "View.MemoryView":137 * * if itemsize <= 0: * raise ValueError("itemsize <= 0 for cython.array") # <<<<<<<<<<<<<< * * if not isinstance(format, bytes): */ - __pyx_tuple__17 = PyTuple_Pack(1, __pyx_kp_s_itemsize_0_for_cython_array); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(1, 136, __pyx_L1_error) + __pyx_tuple__17 = PyTuple_Pack(1, __pyx_kp_s_itemsize_0_for_cython_array); if (unlikely(!__pyx_tuple__17)) __PYX_ERR(1, 137, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__17); __Pyx_GIVEREF(__pyx_tuple__17); - /* "View.MemoryView":148 + /* "View.MemoryView":149 * * if not self._shape: * raise MemoryError("unable to allocate shape and strides.") # <<<<<<<<<<<<<< * * */ - __pyx_tuple__18 = PyTuple_Pack(1, __pyx_kp_s_unable_to_allocate_shape_and_str); if (unlikely(!__pyx_tuple__18)) __PYX_ERR(1, 148, __pyx_L1_error) + __pyx_tuple__18 = PyTuple_Pack(1, __pyx_kp_s_unable_to_allocate_shape_and_str); if (unlikely(!__pyx_tuple__18)) __PYX_ERR(1, 149, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__18); __Pyx_GIVEREF(__pyx_tuple__18); - /* "View.MemoryView":176 + /* "View.MemoryView":177 * self.data = malloc(self.len) * if not self.data: * raise MemoryError("unable to allocate array data.") # <<<<<<<<<<<<<< * * if self.dtype_is_object: */ - __pyx_tuple__19 = PyTuple_Pack(1, __pyx_kp_s_unable_to_allocate_array_data); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(1, 176, __pyx_L1_error) + __pyx_tuple__19 = PyTuple_Pack(1, __pyx_kp_s_unable_to_allocate_array_data); if (unlikely(!__pyx_tuple__19)) __PYX_ERR(1, 177, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__19); __Pyx_GIVEREF(__pyx_tuple__19); - /* "View.MemoryView":192 + /* "View.MemoryView":193 * bufmode = PyBUF_F_CONTIGUOUS | PyBUF_ANY_CONTIGUOUS * if not (flags & bufmode): * raise ValueError("Can only create a buffer that is contiguous in memory.") # <<<<<<<<<<<<<< * info.buf = self.data * info.len = self.len */ - __pyx_tuple__20 = PyTuple_Pack(1, __pyx_kp_s_Can_only_create_a_buffer_that_is); if (unlikely(!__pyx_tuple__20)) __PYX_ERR(1, 192, __pyx_L1_error) + __pyx_tuple__20 = PyTuple_Pack(1, __pyx_kp_s_Can_only_create_a_buffer_that_is); if (unlikely(!__pyx_tuple__20)) __PYX_ERR(1, 193, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__20); __Pyx_GIVEREF(__pyx_tuple__20); @@ -24024,58 +24087,58 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__22); __Pyx_GIVEREF(__pyx_tuple__22); - /* "View.MemoryView":418 + /* "View.MemoryView":420 * def __setitem__(memoryview self, object index, object value): * if self.view.readonly: * raise TypeError("Cannot assign to read-only memoryview") # <<<<<<<<<<<<<< * * have_slices, index = _unellipsify(index, self.view.ndim) */ - __pyx_tuple__23 = PyTuple_Pack(1, __pyx_kp_s_Cannot_assign_to_read_only_memor); if (unlikely(!__pyx_tuple__23)) __PYX_ERR(1, 418, __pyx_L1_error) + __pyx_tuple__23 = PyTuple_Pack(1, __pyx_kp_s_Cannot_assign_to_read_only_memor); if (unlikely(!__pyx_tuple__23)) __PYX_ERR(1, 420, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__23); __Pyx_GIVEREF(__pyx_tuple__23); - /* "View.MemoryView":495 + /* "View.MemoryView":497 * result = struct.unpack(self.view.format, bytesitem) * except struct.error: * raise ValueError("Unable to convert item to object") # <<<<<<<<<<<<<< * else: * if len(self.view.format) == 1: */ - __pyx_tuple__24 = PyTuple_Pack(1, __pyx_kp_s_Unable_to_convert_item_to_object); if (unlikely(!__pyx_tuple__24)) __PYX_ERR(1, 495, __pyx_L1_error) + __pyx_tuple__24 = PyTuple_Pack(1, __pyx_kp_s_Unable_to_convert_item_to_object); if (unlikely(!__pyx_tuple__24)) __PYX_ERR(1, 497, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__24); __Pyx_GIVEREF(__pyx_tuple__24); - /* "View.MemoryView":520 + /* "View.MemoryView":522 * def __getbuffer__(self, Py_buffer *info, int flags): * if flags & PyBUF_WRITABLE and self.view.readonly: * raise ValueError("Cannot create writable memory view from read-only memoryview") # <<<<<<<<<<<<<< * * if flags & PyBUF_ND: */ - __pyx_tuple__25 = PyTuple_Pack(1, __pyx_kp_s_Cannot_create_writable_memory_vi); if (unlikely(!__pyx_tuple__25)) __PYX_ERR(1, 520, __pyx_L1_error) + __pyx_tuple__25 = PyTuple_Pack(1, __pyx_kp_s_Cannot_create_writable_memory_vi); if (unlikely(!__pyx_tuple__25)) __PYX_ERR(1, 522, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__25); __Pyx_GIVEREF(__pyx_tuple__25); - /* "View.MemoryView":570 + /* "View.MemoryView":572 * if self.view.strides == NULL: * * raise ValueError("Buffer view does not expose strides") # <<<<<<<<<<<<<< * * return tuple([stride for stride in self.view.strides[:self.view.ndim]]) */ - __pyx_tuple__26 = PyTuple_Pack(1, __pyx_kp_s_Buffer_view_does_not_expose_stri); if (unlikely(!__pyx_tuple__26)) __PYX_ERR(1, 570, __pyx_L1_error) + __pyx_tuple__26 = PyTuple_Pack(1, __pyx_kp_s_Buffer_view_does_not_expose_stri); if (unlikely(!__pyx_tuple__26)) __PYX_ERR(1, 572, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__26); __Pyx_GIVEREF(__pyx_tuple__26); - /* "View.MemoryView":577 + /* "View.MemoryView":579 * def suboffsets(self): * if self.view.suboffsets == NULL: * return (-1,) * self.view.ndim # <<<<<<<<<<<<<< * * return tuple([suboffset for suboffset in self.view.suboffsets[:self.view.ndim]]) */ - __pyx_tuple__27 = PyTuple_New(1); if (unlikely(!__pyx_tuple__27)) __PYX_ERR(1, 577, __pyx_L1_error) + __pyx_tuple__27 = PyTuple_New(1); if (unlikely(!__pyx_tuple__27)) __PYX_ERR(1, 579, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__27); __Pyx_INCREF(__pyx_int_neg_1); __Pyx_GIVEREF(__pyx_int_neg_1); @@ -24101,25 +24164,25 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__29); __Pyx_GIVEREF(__pyx_tuple__29); - /* "View.MemoryView":682 + /* "View.MemoryView":684 * if item is Ellipsis: * if not seen_ellipsis: * result.extend([slice(None)] * (ndim - len(tup) + 1)) # <<<<<<<<<<<<<< * seen_ellipsis = True * else: */ - __pyx_slice__30 = PySlice_New(Py_None, Py_None, Py_None); if (unlikely(!__pyx_slice__30)) __PYX_ERR(1, 682, __pyx_L1_error) + __pyx_slice__30 = PySlice_New(Py_None, Py_None, Py_None); if (unlikely(!__pyx_slice__30)) __PYX_ERR(1, 684, __pyx_L1_error) __Pyx_GOTREF(__pyx_slice__30); __Pyx_GIVEREF(__pyx_slice__30); - /* "View.MemoryView":703 + /* "View.MemoryView":705 * for suboffset in suboffsets[:ndim]: * if suboffset >= 0: * raise ValueError("Indirect dimensions not supported") # <<<<<<<<<<<<<< * * */ - __pyx_tuple__31 = PyTuple_Pack(1, __pyx_kp_s_Indirect_dimensions_not_supporte); if (unlikely(!__pyx_tuple__31)) __PYX_ERR(1, 703, __pyx_L1_error) + __pyx_tuple__31 = PyTuple_Pack(1, __pyx_kp_s_Indirect_dimensions_not_supporte); if (unlikely(!__pyx_tuple__31)) __PYX_ERR(1, 705, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__31); __Pyx_GIVEREF(__pyx_tuple__31); @@ -24145,58 +24208,58 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__34); __Pyx_GIVEREF(__pyx_tuple__34); - /* "View.MemoryView":286 + /* "View.MemoryView":287 * return self.name * * cdef generic = Enum("") # <<<<<<<<<<<<<< * cdef strided = Enum("") # default * cdef indirect = Enum("") */ - __pyx_tuple__36 = PyTuple_Pack(1, __pyx_kp_s_strided_and_direct_or_indirect); if (unlikely(!__pyx_tuple__36)) __PYX_ERR(1, 286, __pyx_L1_error) + __pyx_tuple__36 = PyTuple_Pack(1, __pyx_kp_s_strided_and_direct_or_indirect); if (unlikely(!__pyx_tuple__36)) __PYX_ERR(1, 287, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__36); __Pyx_GIVEREF(__pyx_tuple__36); - /* "View.MemoryView":287 + /* "View.MemoryView":288 * * cdef generic = Enum("") * cdef strided = Enum("") # default # <<<<<<<<<<<<<< * cdef indirect = Enum("") * */ - __pyx_tuple__37 = PyTuple_Pack(1, __pyx_kp_s_strided_and_direct); if (unlikely(!__pyx_tuple__37)) __PYX_ERR(1, 287, __pyx_L1_error) + __pyx_tuple__37 = PyTuple_Pack(1, __pyx_kp_s_strided_and_direct); if (unlikely(!__pyx_tuple__37)) __PYX_ERR(1, 288, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__37); __Pyx_GIVEREF(__pyx_tuple__37); - /* "View.MemoryView":288 + /* "View.MemoryView":289 * cdef generic = Enum("") * cdef strided = Enum("") # default * cdef indirect = Enum("") # <<<<<<<<<<<<<< * * */ - __pyx_tuple__38 = PyTuple_Pack(1, __pyx_kp_s_strided_and_indirect); if (unlikely(!__pyx_tuple__38)) __PYX_ERR(1, 288, __pyx_L1_error) + __pyx_tuple__38 = PyTuple_Pack(1, __pyx_kp_s_strided_and_indirect); if (unlikely(!__pyx_tuple__38)) __PYX_ERR(1, 289, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__38); __Pyx_GIVEREF(__pyx_tuple__38); - /* "View.MemoryView":291 + /* "View.MemoryView":292 * * * cdef contiguous = Enum("") # <<<<<<<<<<<<<< * cdef indirect_contiguous = Enum("") * */ - __pyx_tuple__39 = PyTuple_Pack(1, __pyx_kp_s_contiguous_and_direct); if (unlikely(!__pyx_tuple__39)) __PYX_ERR(1, 291, __pyx_L1_error) + __pyx_tuple__39 = PyTuple_Pack(1, __pyx_kp_s_contiguous_and_direct); if (unlikely(!__pyx_tuple__39)) __PYX_ERR(1, 292, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__39); __Pyx_GIVEREF(__pyx_tuple__39); - /* "View.MemoryView":292 + /* "View.MemoryView":293 * * cdef contiguous = Enum("") * cdef indirect_contiguous = Enum("") # <<<<<<<<<<<<<< * * */ - __pyx_tuple__40 = PyTuple_Pack(1, __pyx_kp_s_contiguous_and_indirect); if (unlikely(!__pyx_tuple__40)) __PYX_ERR(1, 292, __pyx_L1_error) + __pyx_tuple__40 = PyTuple_Pack(1, __pyx_kp_s_contiguous_and_indirect); if (unlikely(!__pyx_tuple__40)) __PYX_ERR(1, 293, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__40); __Pyx_GIVEREF(__pyx_tuple__40); @@ -24374,21 +24437,21 @@ static int __Pyx_modinit_type_init_code(void) { __pyx_ptype_9csimdjson___pyx_scope_struct_4_get_implementations = &__pyx_type_9csimdjson___pyx_scope_struct_4_get_implementations; __pyx_vtabptr_array = &__pyx_vtable_array; __pyx_vtable_array.get_memview = (PyObject *(*)(struct __pyx_array_obj *))__pyx_array_get_memview; - if (PyType_Ready(&__pyx_type___pyx_array) < 0) __PYX_ERR(1, 105, __pyx_L1_error) + if (PyType_Ready(&__pyx_type___pyx_array) < 0) __PYX_ERR(1, 106, __pyx_L1_error) #if PY_VERSION_HEX < 0x030800B1 __pyx_type___pyx_array.tp_print = 0; #endif - if (__Pyx_SetVtable(__pyx_type___pyx_array.tp_dict, __pyx_vtabptr_array) < 0) __PYX_ERR(1, 105, __pyx_L1_error) - if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_array) < 0) __PYX_ERR(1, 105, __pyx_L1_error) + if (__Pyx_SetVtable(__pyx_type___pyx_array.tp_dict, __pyx_vtabptr_array) < 0) __PYX_ERR(1, 106, __pyx_L1_error) + if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_array) < 0) __PYX_ERR(1, 106, __pyx_L1_error) __pyx_array_type = &__pyx_type___pyx_array; - if (PyType_Ready(&__pyx_type___pyx_MemviewEnum) < 0) __PYX_ERR(1, 279, __pyx_L1_error) + if (PyType_Ready(&__pyx_type___pyx_MemviewEnum) < 0) __PYX_ERR(1, 280, __pyx_L1_error) #if PY_VERSION_HEX < 0x030800B1 __pyx_type___pyx_MemviewEnum.tp_print = 0; #endif if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_type___pyx_MemviewEnum.tp_dictoffset && __pyx_type___pyx_MemviewEnum.tp_getattro == PyObject_GenericGetAttr)) { __pyx_type___pyx_MemviewEnum.tp_getattro = __Pyx_PyObject_GenericGetAttr; } - if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_MemviewEnum) < 0) __PYX_ERR(1, 279, __pyx_L1_error) + if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_MemviewEnum) < 0) __PYX_ERR(1, 280, __pyx_L1_error) __pyx_MemviewEnum_type = &__pyx_type___pyx_MemviewEnum; __pyx_vtabptr_memoryview = &__pyx_vtable_memoryview; __pyx_vtable_memoryview.get_item_pointer = (char *(*)(struct __pyx_memoryview_obj *, PyObject *))__pyx_memoryview_get_item_pointer; @@ -24398,30 +24461,30 @@ static int __Pyx_modinit_type_init_code(void) { __pyx_vtable_memoryview.setitem_indexed = (PyObject *(*)(struct __pyx_memoryview_obj *, PyObject *, PyObject *))__pyx_memoryview_setitem_indexed; __pyx_vtable_memoryview.convert_item_to_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *))__pyx_memoryview_convert_item_to_object; __pyx_vtable_memoryview.assign_item_from_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *, PyObject *))__pyx_memoryview_assign_item_from_object; - if (PyType_Ready(&__pyx_type___pyx_memoryview) < 0) __PYX_ERR(1, 330, __pyx_L1_error) + if (PyType_Ready(&__pyx_type___pyx_memoryview) < 0) __PYX_ERR(1, 331, __pyx_L1_error) #if PY_VERSION_HEX < 0x030800B1 __pyx_type___pyx_memoryview.tp_print = 0; #endif if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_type___pyx_memoryview.tp_dictoffset && __pyx_type___pyx_memoryview.tp_getattro == PyObject_GenericGetAttr)) { __pyx_type___pyx_memoryview.tp_getattro = __Pyx_PyObject_GenericGetAttr; } - if (__Pyx_SetVtable(__pyx_type___pyx_memoryview.tp_dict, __pyx_vtabptr_memoryview) < 0) __PYX_ERR(1, 330, __pyx_L1_error) - if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_memoryview) < 0) __PYX_ERR(1, 330, __pyx_L1_error) + if (__Pyx_SetVtable(__pyx_type___pyx_memoryview.tp_dict, __pyx_vtabptr_memoryview) < 0) __PYX_ERR(1, 331, __pyx_L1_error) + if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_memoryview) < 0) __PYX_ERR(1, 331, __pyx_L1_error) __pyx_memoryview_type = &__pyx_type___pyx_memoryview; __pyx_vtabptr__memoryviewslice = &__pyx_vtable__memoryviewslice; __pyx_vtable__memoryviewslice.__pyx_base = *__pyx_vtabptr_memoryview; __pyx_vtable__memoryviewslice.__pyx_base.convert_item_to_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *))__pyx_memoryviewslice_convert_item_to_object; __pyx_vtable__memoryviewslice.__pyx_base.assign_item_from_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *, PyObject *))__pyx_memoryviewslice_assign_item_from_object; __pyx_type___pyx_memoryviewslice.tp_base = __pyx_memoryview_type; - if (PyType_Ready(&__pyx_type___pyx_memoryviewslice) < 0) __PYX_ERR(1, 965, __pyx_L1_error) + if (PyType_Ready(&__pyx_type___pyx_memoryviewslice) < 0) __PYX_ERR(1, 967, __pyx_L1_error) #if PY_VERSION_HEX < 0x030800B1 __pyx_type___pyx_memoryviewslice.tp_print = 0; #endif if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_type___pyx_memoryviewslice.tp_dictoffset && __pyx_type___pyx_memoryviewslice.tp_getattro == PyObject_GenericGetAttr)) { __pyx_type___pyx_memoryviewslice.tp_getattro = __Pyx_PyObject_GenericGetAttr; } - if (__Pyx_SetVtable(__pyx_type___pyx_memoryviewslice.tp_dict, __pyx_vtabptr__memoryviewslice) < 0) __PYX_ERR(1, 965, __pyx_L1_error) - if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_memoryviewslice) < 0) __PYX_ERR(1, 965, __pyx_L1_error) + if (__Pyx_SetVtable(__pyx_type___pyx_memoryviewslice.tp_dict, __pyx_vtabptr__memoryviewslice) < 0) __PYX_ERR(1, 967, __pyx_L1_error) + if (__Pyx_setup_reduce((PyObject*)&__pyx_type___pyx_memoryviewslice) < 0) __PYX_ERR(1, 967, __pyx_L1_error) __pyx_memoryviewslice_type = &__pyx_type___pyx_memoryviewslice; __Pyx_RefNannyFinishContext(); return 0; @@ -24812,90 +24875,90 @@ if (!__Pyx_RefNanny) { if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_4) < 0) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "View.MemoryView":209 + /* "View.MemoryView":210 * info.obj = self * * __pyx_getbuffer = capsule( &__pyx_array_getbuffer, "getbuffer(obj, view, flags)") # <<<<<<<<<<<<<< * * def __dealloc__(array self): */ - __pyx_t_4 = __pyx_capsule_create(((void *)(&__pyx_array_getbuffer)), ((char *)"getbuffer(obj, view, flags)")); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 209, __pyx_L1_error) + __pyx_t_4 = __pyx_capsule_create(((void *)(&__pyx_array_getbuffer)), ((char *)"getbuffer(obj, view, flags)")); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 210, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem((PyObject *)__pyx_array_type->tp_dict, __pyx_n_s_pyx_getbuffer, __pyx_t_4) < 0) __PYX_ERR(1, 209, __pyx_L1_error) + if (PyDict_SetItem((PyObject *)__pyx_array_type->tp_dict, __pyx_n_s_pyx_getbuffer, __pyx_t_4) < 0) __PYX_ERR(1, 210, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; PyType_Modified(__pyx_array_type); - /* "View.MemoryView":286 + /* "View.MemoryView":287 * return self.name * * cdef generic = Enum("") # <<<<<<<<<<<<<< * cdef strided = Enum("") # default * cdef indirect = Enum("") */ - __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__36, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 286, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__36, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 287, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_XGOTREF(generic); __Pyx_DECREF_SET(generic, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - /* "View.MemoryView":287 + /* "View.MemoryView":288 * * cdef generic = Enum("") * cdef strided = Enum("") # default # <<<<<<<<<<<<<< * cdef indirect = Enum("") * */ - __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__37, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 287, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__37, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 288, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_XGOTREF(strided); __Pyx_DECREF_SET(strided, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - /* "View.MemoryView":288 + /* "View.MemoryView":289 * cdef generic = Enum("") * cdef strided = Enum("") # default * cdef indirect = Enum("") # <<<<<<<<<<<<<< * * */ - __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__38, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 288, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__38, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 289, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_XGOTREF(indirect); __Pyx_DECREF_SET(indirect, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - /* "View.MemoryView":291 + /* "View.MemoryView":292 * * * cdef contiguous = Enum("") # <<<<<<<<<<<<<< * cdef indirect_contiguous = Enum("") * */ - __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__39, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 291, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__39, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 292, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_XGOTREF(contiguous); __Pyx_DECREF_SET(contiguous, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - /* "View.MemoryView":292 + /* "View.MemoryView":293 * * cdef contiguous = Enum("") * cdef indirect_contiguous = Enum("") # <<<<<<<<<<<<<< * * */ - __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__40, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 292, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_Call(((PyObject *)__pyx_MemviewEnum_type), __pyx_tuple__40, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 293, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_XGOTREF(indirect_contiguous); __Pyx_DECREF_SET(indirect_contiguous, __pyx_t_4); __Pyx_GIVEREF(__pyx_t_4); __pyx_t_4 = 0; - /* "View.MemoryView":316 + /* "View.MemoryView":317 * * DEF THREAD_LOCKS_PREALLOCATED = 8 * cdef int __pyx_memoryview_thread_locks_used = 0 # <<<<<<<<<<<<<< @@ -24904,7 +24967,7 @@ if (!__Pyx_RefNanny) { */ __pyx_memoryview_thread_locks_used = 0; - /* "View.MemoryView":317 + /* "View.MemoryView":318 * DEF THREAD_LOCKS_PREALLOCATED = 8 * cdef int __pyx_memoryview_thread_locks_used = 0 * cdef PyThread_type_lock[THREAD_LOCKS_PREALLOCATED] __pyx_memoryview_thread_locks = [ # <<<<<<<<<<<<<< @@ -24921,29 +24984,29 @@ if (!__Pyx_RefNanny) { __pyx_t_5[7] = PyThread_allocate_lock(); memcpy(&(__pyx_memoryview_thread_locks[0]), __pyx_t_5, sizeof(__pyx_memoryview_thread_locks[0]) * (8)); - /* "View.MemoryView":549 + /* "View.MemoryView":551 * info.obj = self * * __pyx_getbuffer = capsule( &__pyx_memoryview_getbuffer, "getbuffer(obj, view, flags)") # <<<<<<<<<<<<<< * * */ - __pyx_t_4 = __pyx_capsule_create(((void *)(&__pyx_memoryview_getbuffer)), ((char *)"getbuffer(obj, view, flags)")); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 549, __pyx_L1_error) + __pyx_t_4 = __pyx_capsule_create(((void *)(&__pyx_memoryview_getbuffer)), ((char *)"getbuffer(obj, view, flags)")); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 551, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem((PyObject *)__pyx_memoryview_type->tp_dict, __pyx_n_s_pyx_getbuffer, __pyx_t_4) < 0) __PYX_ERR(1, 549, __pyx_L1_error) + if (PyDict_SetItem((PyObject *)__pyx_memoryview_type->tp_dict, __pyx_n_s_pyx_getbuffer, __pyx_t_4) < 0) __PYX_ERR(1, 551, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; PyType_Modified(__pyx_memoryview_type); - /* "View.MemoryView":995 + /* "View.MemoryView":997 * return self.from_object * * __pyx_getbuffer = capsule( &__pyx_memoryview_getbuffer, "getbuffer(obj, view, flags)") # <<<<<<<<<<<<<< * * */ - __pyx_t_4 = __pyx_capsule_create(((void *)(&__pyx_memoryview_getbuffer)), ((char *)"getbuffer(obj, view, flags)")); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 995, __pyx_L1_error) + __pyx_t_4 = __pyx_capsule_create(((void *)(&__pyx_memoryview_getbuffer)), ((char *)"getbuffer(obj, view, flags)")); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 997, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem((PyObject *)__pyx_memoryviewslice_type->tp_dict, __pyx_n_s_pyx_getbuffer, __pyx_t_4) < 0) __PYX_ERR(1, 995, __pyx_L1_error) + if (PyDict_SetItem((PyObject *)__pyx_memoryviewslice_type->tp_dict, __pyx_n_s_pyx_getbuffer, __pyx_t_4) < 0) __PYX_ERR(1, 997, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; PyType_Modified(__pyx_memoryviewslice_type); @@ -25981,7 +26044,7 @@ __Pyx_init_memviewslice(struct __pyx_memoryview_obj *memview, static void __pyx_fatalerror(const char *fmt, ...) Py_NO_RETURN { va_list vargs; char msg[200]; -#ifdef HAVE_STDARG_PROTOTYPES +#if PY_VERSION_HEX >= 0x030A0000 || defined(HAVE_STDARG_PROTOTYPES) va_start(vargs, fmt); #else va_start(vargs); @@ -27163,7 +27226,7 @@ static PyObject* __Pyx_PyUnicode_BuildFromAscii(Py_ssize_t ulength, char* chars, padding = PyUnicode_FromOrdinal(padding_char); if (likely(padding) && uoffset > prepend_sign + 1) { PyObject *tmp; - PyObject *repeat = PyInt_FromSize_t(uoffset - prepend_sign); + PyObject *repeat = PyInt_FromSsize_t(uoffset - prepend_sign); if (unlikely(!repeat)) goto done_or_error; tmp = PyNumber_Multiply(padding, repeat); Py_DECREF(repeat); diff --git a/simdjson/patches/README b/simdjson/patches/README new file mode 100644 index 0000000..6b1f495 --- /dev/null +++ b/simdjson/patches/README @@ -0,0 +1,5 @@ +Patches to be applied to simdjson library files. See simdjson/simdjson/README + +To make a new patch file: + * Get the patch contents: for f in simdjson.h simdjson.cpp; do diff -u HEAD:simdjson/simdjson_source/$f simdjson/$f; done + * Replace in the .patch file, retaining the explanation comment at the top. Make sure patch is newline terminated. diff --git a/simdjson/patches/apply.sh b/simdjson/patches/apply.sh new file mode 100755 index 0000000..73d3659 --- /dev/null +++ b/simdjson/patches/apply.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +set -e + +cd "$(dirname "$0")/../.." +cp simdjson/simdjson_source/simdjson.{h,cpp} simdjson/ +patch -ruN -p0 < simdjson/patches/float-aware-minify.patch diff --git a/simdjson/patches/float-aware-minify.patch b/simdjson/patches/float-aware-minify.patch new file mode 100644 index 0000000..409e1f8 --- /dev/null +++ b/simdjson/patches/float-aware-minify.patch @@ -0,0 +1,42 @@ + + +simdjson minify drops the trailing '.0' from floats, which is fine by JSON spec, +but matters in practice. For example, Elasticsearch dynamic field type detection +is affected. In general, Python distinguishes between int and float, so various +type guarantees may fail. The dump/load cycle should not convert types for a few +byte gain. Let users explicitly convert types, if they need to. + +This modifies minify, so it does not drop the '.0'. + +Note: simdjson started dropping '.0' with d0821adf0e7934f27a8eb5c2fe9b8254e4. + + + +--- simdjson/simdjson_source/simdjson.cpp 2022-11-22 09:20:33.402000000 +0200 ++++ simdjson/simdjson.cpp 2022-11-22 09:19:27.854085130 +0200 +@@ -868,9 +868,9 @@ + + std::memset(buf + k, '0', static_cast(n) - static_cast(k)); + // Make it look like a floating-point number (#362, #378) +- // buf[n + 0] = '.'; +- // buf[n + 1] = '0'; +- return buf + (static_cast(n)); ++ buf[n + 0] = '.'; ++ buf[n + 1] = '0'; ++ return buf + (static_cast(n) + 2U); + } + + if (0 < n && n <= max_exp) { +@@ -933,10 +933,8 @@ + { + *first++ = '0'; + // Make it look like a floating-point number (#362, #378) +- if(negative) { +- *first++ = '.'; +- *first++ = '0'; +- } ++ *first++ = '.'; ++ *first++ = '0'; + return first; + } + // Compute v = buffer * 10^decimal_exponent. diff --git a/simdjson/simdjson.cpp b/simdjson/simdjson.cpp index c98c4b1..edcec47 100644 --- a/simdjson/simdjson.cpp +++ b/simdjson/simdjson.cpp @@ -868,9 +868,9 @@ inline char *format_buffer(char *buf, int len, int decimal_exponent, std::memset(buf + k, '0', static_cast(n) - static_cast(k)); // Make it look like a floating-point number (#362, #378) - // buf[n + 0] = '.'; - // buf[n + 1] = '0'; - return buf + (static_cast(n)); + buf[n + 0] = '.'; + buf[n + 1] = '0'; + return buf + (static_cast(n) + 2U); } if (0 < n && n <= max_exp) { @@ -933,10 +933,8 @@ char *to_chars(char *first, const char *last, double value) { { *first++ = '0'; // Make it look like a floating-point number (#362, #378) - if(negative) { - *first++ = '.'; - *first++ = '0'; - } + *first++ = '.'; + *first++ = '0'; return first; } // Compute v = buffer * 10^decimal_exponent. diff --git a/simdjson/simdjson_source/README b/simdjson/simdjson_source/README new file mode 100644 index 0000000..499cab1 --- /dev/null +++ b/simdjson/simdjson_source/README @@ -0,0 +1,7 @@ +singleheader/ files from the simdjson library. These are not used directly. +Patch-applied files are used instead, in the simdjson/ directory. + +To update: + * Copy from simdjson: https://github.com/simdjson/simdjson/tree/master/singleheader + * Run simdjson/patches/apply.sh + * Increase pysimdjson version in setup.py diff --git a/simdjson/simdjson_source/simdjson.cpp b/simdjson/simdjson_source/simdjson.cpp new file mode 100644 index 0000000..c98c4b1 --- /dev/null +++ b/simdjson/simdjson_source/simdjson.cpp @@ -0,0 +1,15051 @@ +/* auto-generated on 2022-06-02 13:56:20 -0400. Do not edit! */ +/* begin file src/simdjson.cpp */ +#include "simdjson.h" + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_UNDESIRED_WARNINGS + +/* begin file src/to_chars.cpp */ +#include +#include +#include +#include + +namespace simdjson { +namespace internal { +/*! +implements the Grisu2 algorithm for binary to decimal floating-point +conversion. +Adapted from JSON for Modern C++ + +This implementation is a slightly modified version of the reference +implementation which may be obtained from +http://florian.loitsch.com/publications (bench.tar.gz). +The code is distributed under the MIT license, Copyright (c) 2009 Florian +Loitsch. For a detailed description of the algorithm see: [1] Loitsch, "Printing +Floating-Point Numbers Quickly and Accurately with Integers", Proceedings of the +ACM SIGPLAN 2010 Conference on Programming Language Design and Implementation, +PLDI 2010 [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and +Accurately", Proceedings of the ACM SIGPLAN 1996 Conference on Programming +Language Design and Implementation, PLDI 1996 +*/ +namespace dtoa_impl { + +template +Target reinterpret_bits(const Source source) { + static_assert(sizeof(Target) == sizeof(Source), "size mismatch"); + + Target target; + std::memcpy(&target, &source, sizeof(Source)); + return target; +} + +struct diyfp // f * 2^e +{ + static constexpr int kPrecision = 64; // = q + + std::uint64_t f = 0; + int e = 0; + + constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {} + + /*! + @brief returns x - y + @pre x.e == y.e and x.f >= y.f + */ + static diyfp sub(const diyfp &x, const diyfp &y) noexcept { + + return {x.f - y.f, x.e}; + } + + /*! + @brief returns x * y + @note The result is rounded. (Only the upper q bits are returned.) + */ + static diyfp mul(const diyfp &x, const diyfp &y) noexcept { + static_assert(kPrecision == 64, "internal error"); + + // Computes: + // f = round((x.f * y.f) / 2^q) + // e = x.e + y.e + q + + // Emulate the 64-bit * 64-bit multiplication: + // + // p = u * v + // = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi) + // = (u_lo v_lo ) + 2^32 ((u_lo v_hi ) + (u_hi v_lo )) + + // 2^64 (u_hi v_hi ) = (p0 ) + 2^32 ((p1 ) + (p2 )) + // + 2^64 (p3 ) = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo + + // 2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3 ) = + // (p0_lo ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi + + // p2_hi + p3) = (p0_lo ) + 2^32 (Q ) + 2^64 (H ) = (p0_lo ) + + // 2^32 (Q_lo + 2^32 Q_hi ) + 2^64 (H ) + // + // (Since Q might be larger than 2^32 - 1) + // + // = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H) + // + // (Q_hi + H does not overflow a 64-bit int) + // + // = p_lo + 2^64 p_hi + + const std::uint64_t u_lo = x.f & 0xFFFFFFFFu; + const std::uint64_t u_hi = x.f >> 32u; + const std::uint64_t v_lo = y.f & 0xFFFFFFFFu; + const std::uint64_t v_hi = y.f >> 32u; + + const std::uint64_t p0 = u_lo * v_lo; + const std::uint64_t p1 = u_lo * v_hi; + const std::uint64_t p2 = u_hi * v_lo; + const std::uint64_t p3 = u_hi * v_hi; + + const std::uint64_t p0_hi = p0 >> 32u; + const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu; + const std::uint64_t p1_hi = p1 >> 32u; + const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu; + const std::uint64_t p2_hi = p2 >> 32u; + + std::uint64_t Q = p0_hi + p1_lo + p2_lo; + + // The full product might now be computed as + // + // p_hi = p3 + p2_hi + p1_hi + (Q >> 32) + // p_lo = p0_lo + (Q << 32) + // + // But in this particular case here, the full p_lo is not required. + // Effectively we only need to add the highest bit in p_lo to p_hi (and + // Q_hi + 1 does not overflow). + + Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up + + const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u); + + return {h, x.e + y.e + 64}; + } + + /*! + @brief normalize x such that the significand is >= 2^(q-1) + @pre x.f != 0 + */ + static diyfp normalize(diyfp x) noexcept { + + while ((x.f >> 63u) == 0) { + x.f <<= 1u; + x.e--; + } + + return x; + } + + /*! + @brief normalize x such that the result has the exponent E + @pre e >= x.e and the upper e - x.e bits of x.f must be zero. + */ + static diyfp normalize_to(const diyfp &x, + const int target_exponent) noexcept { + const int delta = x.e - target_exponent; + + return {x.f << delta, target_exponent}; + } +}; + +struct boundaries { + diyfp w; + diyfp minus; + diyfp plus; +}; + +/*! +Compute the (normalized) diyfp representing the input number 'value' and its +boundaries. +@pre value must be finite and positive +*/ +template boundaries compute_boundaries(FloatType value) { + + // Convert the IEEE representation into a diyfp. + // + // If v is denormal: + // value = 0.F * 2^(1 - bias) = ( F) * 2^(1 - bias - (p-1)) + // If v is normalized: + // value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1)) + + static_assert(std::numeric_limits::is_iec559, + "internal error: dtoa_short requires an IEEE-754 " + "floating-point implementation"); + + constexpr int kPrecision = + std::numeric_limits::digits; // = p (includes the hidden bit) + constexpr int kBias = + std::numeric_limits::max_exponent - 1 + (kPrecision - 1); + constexpr int kMinExp = 1 - kBias; + constexpr std::uint64_t kHiddenBit = std::uint64_t{1} + << (kPrecision - 1); // = 2^(p-1) + + using bits_type = typename std::conditional::type; + + const std::uint64_t bits = reinterpret_bits(value); + const std::uint64_t E = bits >> (kPrecision - 1); + const std::uint64_t F = bits & (kHiddenBit - 1); + + const bool is_denormal = E == 0; + const diyfp v = is_denormal + ? diyfp(F, kMinExp) + : diyfp(F + kHiddenBit, static_cast(E) - kBias); + + // Compute the boundaries m- and m+ of the floating-point value + // v = f * 2^e. + // + // Determine v- and v+, the floating-point predecessor and successor if v, + // respectively. + // + // v- = v - 2^e if f != 2^(p-1) or e == e_min (A) + // = v - 2^(e-1) if f == 2^(p-1) and e > e_min (B) + // + // v+ = v + 2^e + // + // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_ + // between m- and m+ round to v, regardless of how the input rounding + // algorithm breaks ties. + // + // ---+-------------+-------------+-------------+-------------+--- (A) + // v- m- v m+ v+ + // + // -----------------+------+------+-------------+-------------+--- (B) + // v- m- v m+ v+ + + const bool lower_boundary_is_closer = F == 0 && E > 1; + const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1); + const diyfp m_minus = lower_boundary_is_closer + ? diyfp(4 * v.f - 1, v.e - 2) // (B) + : diyfp(2 * v.f - 1, v.e - 1); // (A) + + // Determine the normalized w+ = m+. + const diyfp w_plus = diyfp::normalize(m_plus); + + // Determine w- = m- such that e_(w-) = e_(w+). + const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e); + + return {diyfp::normalize(v), w_minus, w_plus}; +} + +// Given normalized diyfp w, Grisu needs to find a (normalized) cached +// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies +// within a certain range [alpha, gamma] (Definition 3.2 from [1]) +// +// alpha <= e = e_c + e_w + q <= gamma +// +// or +// +// f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q +// <= f_c * f_w * 2^gamma +// +// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies +// +// 2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma +// +// or +// +// 2^(q - 2 + alpha) <= c * w < 2^(q + gamma) +// +// The choice of (alpha,gamma) determines the size of the table and the form of +// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well +// in practice: +// +// The idea is to cut the number c * w = f * 2^e into two parts, which can be +// processed independently: An integral part p1, and a fractional part p2: +// +// f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e +// = (f div 2^-e) + (f mod 2^-e) * 2^e +// = p1 + p2 * 2^e +// +// The conversion of p1 into decimal form requires a series of divisions and +// modulos by (a power of) 10. These operations are faster for 32-bit than for +// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be +// achieved by choosing +// +// -e >= 32 or e <= -32 := gamma +// +// In order to convert the fractional part +// +// p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ... +// +// into decimal form, the fraction is repeatedly multiplied by 10 and the digits +// d[-i] are extracted in order: +// +// (10 * p2) div 2^-e = d[-1] +// (10 * p2) mod 2^-e = d[-2] / 10^1 + ... +// +// The multiplication by 10 must not overflow. It is sufficient to choose +// +// 10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64. +// +// Since p2 = f mod 2^-e < 2^-e, +// +// -e <= 60 or e >= -60 := alpha + +constexpr int kAlpha = -60; +constexpr int kGamma = -32; + +struct cached_power // c = f * 2^e ~= 10^k +{ + std::uint64_t f; + int e; + int k; +}; + +/*! +For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached +power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c +satisfies (Definition 3.2 from [1]) + alpha <= e_c + e + q <= gamma. +*/ +inline cached_power get_cached_power_for_binary_exponent(int e) { + // Now + // + // alpha <= e_c + e + q <= gamma (1) + // ==> f_c * 2^alpha <= c * 2^e * 2^q + // + // and since the c's are normalized, 2^(q-1) <= f_c, + // + // ==> 2^(q - 1 + alpha) <= c * 2^(e + q) + // ==> 2^(alpha - e - 1) <= c + // + // If c were an exact power of ten, i.e. c = 10^k, one may determine k as + // + // k = ceil( log_10( 2^(alpha - e - 1) ) ) + // = ceil( (alpha - e - 1) * log_10(2) ) + // + // From the paper: + // "In theory the result of the procedure could be wrong since c is rounded, + // and the computation itself is approximated [...]. In practice, however, + // this simple function is sufficient." + // + // For IEEE double precision floating-point numbers converted into + // normalized diyfp's w = f * 2^e, with q = 64, + // + // e >= -1022 (min IEEE exponent) + // -52 (p - 1) + // -52 (p - 1, possibly normalize denormal IEEE numbers) + // -11 (normalize the diyfp) + // = -1137 + // + // and + // + // e <= +1023 (max IEEE exponent) + // -52 (p - 1) + // -11 (normalize the diyfp) + // = 960 + // + // This binary exponent range [-1137,960] results in a decimal exponent + // range [-307,324]. One does not need to store a cached power for each + // k in this range. For each such k it suffices to find a cached power + // such that the exponent of the product lies in [alpha,gamma]. + // This implies that the difference of the decimal exponents of adjacent + // table entries must be less than or equal to + // + // floor( (gamma - alpha) * log_10(2) ) = 8. + // + // (A smaller distance gamma-alpha would require a larger table.) + + // NB: + // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34. + + constexpr int kCachedPowersMinDecExp = -300; + constexpr int kCachedPowersDecStep = 8; + + static constexpr std::array kCachedPowers = {{ + {0xAB70FE17C79AC6CA, -1060, -300}, {0xFF77B1FCBEBCDC4F, -1034, -292}, + {0xBE5691EF416BD60C, -1007, -284}, {0x8DD01FAD907FFC3C, -980, -276}, + {0xD3515C2831559A83, -954, -268}, {0x9D71AC8FADA6C9B5, -927, -260}, + {0xEA9C227723EE8BCB, -901, -252}, {0xAECC49914078536D, -874, -244}, + {0x823C12795DB6CE57, -847, -236}, {0xC21094364DFB5637, -821, -228}, + {0x9096EA6F3848984F, -794, -220}, {0xD77485CB25823AC7, -768, -212}, + {0xA086CFCD97BF97F4, -741, -204}, {0xEF340A98172AACE5, -715, -196}, + {0xB23867FB2A35B28E, -688, -188}, {0x84C8D4DFD2C63F3B, -661, -180}, + {0xC5DD44271AD3CDBA, -635, -172}, {0x936B9FCEBB25C996, -608, -164}, + {0xDBAC6C247D62A584, -582, -156}, {0xA3AB66580D5FDAF6, -555, -148}, + {0xF3E2F893DEC3F126, -529, -140}, {0xB5B5ADA8AAFF80B8, -502, -132}, + {0x87625F056C7C4A8B, -475, -124}, {0xC9BCFF6034C13053, -449, -116}, + {0x964E858C91BA2655, -422, -108}, {0xDFF9772470297EBD, -396, -100}, + {0xA6DFBD9FB8E5B88F, -369, -92}, {0xF8A95FCF88747D94, -343, -84}, + {0xB94470938FA89BCF, -316, -76}, {0x8A08F0F8BF0F156B, -289, -68}, + {0xCDB02555653131B6, -263, -60}, {0x993FE2C6D07B7FAC, -236, -52}, + {0xE45C10C42A2B3B06, -210, -44}, {0xAA242499697392D3, -183, -36}, + {0xFD87B5F28300CA0E, -157, -28}, {0xBCE5086492111AEB, -130, -20}, + {0x8CBCCC096F5088CC, -103, -12}, {0xD1B71758E219652C, -77, -4}, + {0x9C40000000000000, -50, 4}, {0xE8D4A51000000000, -24, 12}, + {0xAD78EBC5AC620000, 3, 20}, {0x813F3978F8940984, 30, 28}, + {0xC097CE7BC90715B3, 56, 36}, {0x8F7E32CE7BEA5C70, 83, 44}, + {0xD5D238A4ABE98068, 109, 52}, {0x9F4F2726179A2245, 136, 60}, + {0xED63A231D4C4FB27, 162, 68}, {0xB0DE65388CC8ADA8, 189, 76}, + {0x83C7088E1AAB65DB, 216, 84}, {0xC45D1DF942711D9A, 242, 92}, + {0x924D692CA61BE758, 269, 100}, {0xDA01EE641A708DEA, 295, 108}, + {0xA26DA3999AEF774A, 322, 116}, {0xF209787BB47D6B85, 348, 124}, + {0xB454E4A179DD1877, 375, 132}, {0x865B86925B9BC5C2, 402, 140}, + {0xC83553C5C8965D3D, 428, 148}, {0x952AB45CFA97A0B3, 455, 156}, + {0xDE469FBD99A05FE3, 481, 164}, {0xA59BC234DB398C25, 508, 172}, + {0xF6C69A72A3989F5C, 534, 180}, {0xB7DCBF5354E9BECE, 561, 188}, + {0x88FCF317F22241E2, 588, 196}, {0xCC20CE9BD35C78A5, 614, 204}, + {0x98165AF37B2153DF, 641, 212}, {0xE2A0B5DC971F303A, 667, 220}, + {0xA8D9D1535CE3B396, 694, 228}, {0xFB9B7CD9A4A7443C, 720, 236}, + {0xBB764C4CA7A44410, 747, 244}, {0x8BAB8EEFB6409C1A, 774, 252}, + {0xD01FEF10A657842C, 800, 260}, {0x9B10A4E5E9913129, 827, 268}, + {0xE7109BFBA19C0C9D, 853, 276}, {0xAC2820D9623BF429, 880, 284}, + {0x80444B5E7AA7CF85, 907, 292}, {0xBF21E44003ACDD2D, 933, 300}, + {0x8E679C2F5E44FF8F, 960, 308}, {0xD433179D9C8CB841, 986, 316}, + {0x9E19DB92B4E31BA9, 1013, 324}, + }}; + + // This computation gives exactly the same results for k as + // k = ceil((kAlpha - e - 1) * 0.30102999566398114) + // for |e| <= 1500, but doesn't require floating-point operations. + // NB: log_10(2) ~= 78913 / 2^18 + const int f = kAlpha - e - 1; + const int k = (f * 78913) / (1 << 18) + static_cast(f > 0); + + const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / + kCachedPowersDecStep; + + const cached_power cached = kCachedPowers[static_cast(index)]; + + return cached; +} + +/*! +For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k. +For n == 0, returns 1 and sets pow10 := 1. +*/ +inline int find_largest_pow10(const std::uint32_t n, std::uint32_t &pow10) { + // LCOV_EXCL_START + if (n >= 1000000000) { + pow10 = 1000000000; + return 10; + } + // LCOV_EXCL_STOP + else if (n >= 100000000) { + pow10 = 100000000; + return 9; + } else if (n >= 10000000) { + pow10 = 10000000; + return 8; + } else if (n >= 1000000) { + pow10 = 1000000; + return 7; + } else if (n >= 100000) { + pow10 = 100000; + return 6; + } else if (n >= 10000) { + pow10 = 10000; + return 5; + } else if (n >= 1000) { + pow10 = 1000; + return 4; + } else if (n >= 100) { + pow10 = 100; + return 3; + } else if (n >= 10) { + pow10 = 10; + return 2; + } else { + pow10 = 1; + return 1; + } +} + +inline void grisu2_round(char *buf, int len, std::uint64_t dist, + std::uint64_t delta, std::uint64_t rest, + std::uint64_t ten_k) { + + // <--------------------------- delta ----> + // <---- dist ---------> + // --------------[------------------+-------------------]-------------- + // M- w M+ + // + // ten_k + // <------> + // <---- rest ----> + // --------------[------------------+----+--------------]-------------- + // w V + // = buf * 10^k + // + // ten_k represents a unit-in-the-last-place in the decimal representation + // stored in buf. + // Decrement buf by ten_k while this takes buf closer to w. + + // The tests are written in this order to avoid overflow in unsigned + // integer arithmetic. + + while (rest < dist && delta - rest >= ten_k && + (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) { + buf[len - 1]--; + rest += ten_k; + } +} + +/*! +Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+. +M- and M+ must be normalized and share the same exponent -60 <= e <= -32. +*/ +inline void grisu2_digit_gen(char *buffer, int &length, int &decimal_exponent, + diyfp M_minus, diyfp w, diyfp M_plus) { + static_assert(kAlpha >= -60, "internal error"); + static_assert(kGamma <= -32, "internal error"); + + // Generates the digits (and the exponent) of a decimal floating-point + // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's + // w, M- and M+ share the same exponent e, which satisfies alpha <= e <= + // gamma. + // + // <--------------------------- delta ----> + // <---- dist ---------> + // --------------[------------------+-------------------]-------------- + // M- w M+ + // + // Grisu2 generates the digits of M+ from left to right and stops as soon as + // V is in [M-,M+]. + + std::uint64_t delta = + diyfp::sub(M_plus, M_minus) + .f; // (significand of (M+ - M-), implicit exponent is e) + std::uint64_t dist = + diyfp::sub(M_plus, w) + .f; // (significand of (M+ - w ), implicit exponent is e) + + // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0): + // + // M+ = f * 2^e + // = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e + // = ((p1 ) * 2^-e + (p2 )) * 2^e + // = p1 + p2 * 2^e + + const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e); + + auto p1 = static_cast( + M_plus.f >> + -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.) + std::uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e + + // 1) + // + // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0] + + std::uint32_t pow10; + const int k = find_largest_pow10(p1, pow10); + + // 10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1) + // + // p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1)) + // = (d[k-1] ) * 10^(k-1) + (p1 mod 10^(k-1)) + // + // M+ = p1 + p2 * 2^e + // = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1)) + p2 * 2^e + // = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e + // = d[k-1] * 10^(k-1) + ( rest) * 2^e + // + // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0) + // + // p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0] + // + // but stop as soon as + // + // rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e + + int n = k; + while (n > 0) { + // Invariants: + // M+ = buffer * 10^n + (p1 + p2 * 2^e) (buffer = 0 for n = k) + // pow10 = 10^(n-1) <= p1 < 10^n + // + const std::uint32_t d = p1 / pow10; // d = p1 div 10^(n-1) + const std::uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1) + // + // M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e + // = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e) + // + buffer[length++] = static_cast('0' + d); // buffer := buffer * 10 + d + // + // M+ = buffer * 10^(n-1) + (r + p2 * 2^e) + // + p1 = r; + n--; + // + // M+ = buffer * 10^n + (p1 + p2 * 2^e) + // pow10 = 10^n + // + + // Now check if enough digits have been generated. + // Compute + // + // p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e + // + // Note: + // Since rest and delta share the same exponent e, it suffices to + // compare the significands. + const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2; + if (rest <= delta) { + // V = buffer * 10^n, with M- <= V <= M+. + + decimal_exponent += n; + + // We may now just stop. But instead look if the buffer could be + // decremented to bring V closer to w. + // + // pow10 = 10^n is now 1 ulp in the decimal representation V. + // The rounding procedure works with diyfp's with an implicit + // exponent of e. + // + // 10^n = (10^n * 2^-e) * 2^e = ulp * 2^e + // + const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e; + grisu2_round(buffer, length, dist, delta, rest, ten_n); + + return; + } + + pow10 /= 10; + // + // pow10 = 10^(n-1) <= p1 < 10^n + // Invariants restored. + } + + // 2) + // + // The digits of the integral part have been generated: + // + // M+ = d[k-1]...d[1]d[0] + p2 * 2^e + // = buffer + p2 * 2^e + // + // Now generate the digits of the fractional part p2 * 2^e. + // + // Note: + // No decimal point is generated: the exponent is adjusted instead. + // + // p2 actually represents the fraction + // + // p2 * 2^e + // = p2 / 2^-e + // = d[-1] / 10^1 + d[-2] / 10^2 + ... + // + // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...) + // + // p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m + // + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...) + // + // using + // + // 10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e) + // = ( d) * 2^-e + ( r) + // + // or + // 10^m * p2 * 2^e = d + r * 2^e + // + // i.e. + // + // M+ = buffer + p2 * 2^e + // = buffer + 10^-m * (d + r * 2^e) + // = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e + // + // and stop as soon as 10^-m * r * 2^e <= delta * 2^e + + int m = 0; + for (;;) { + // Invariant: + // M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...) + // * 2^e + // = buffer * 10^-m + 10^-m * (p2 ) + // * 2^e = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e = + // buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e + + // (10*p2 mod 2^-e)) * 2^e + // + p2 *= 10; + const std::uint64_t d = p2 >> -one.e; // d = (10 * p2) div 2^-e + const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e + // + // M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e + // = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e)) + // = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e + // + buffer[length++] = static_cast('0' + d); // buffer := buffer * 10 + d + // + // M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e + // + p2 = r; + m++; + // + // M+ = buffer * 10^-m + 10^-m * p2 * 2^e + // Invariant restored. + + // Check if enough digits have been generated. + // + // 10^-m * p2 * 2^e <= delta * 2^e + // p2 * 2^e <= 10^m * delta * 2^e + // p2 <= 10^m * delta + delta *= 10; + dist *= 10; + if (p2 <= delta) { + break; + } + } + + // V = buffer * 10^-m, with M- <= V <= M+. + + decimal_exponent -= m; + + // 1 ulp in the decimal representation is now 10^-m. + // Since delta and dist are now scaled by 10^m, we need to do the + // same with ulp in order to keep the units in sync. + // + // 10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e + // + const std::uint64_t ten_m = one.f; + grisu2_round(buffer, length, dist, delta, p2, ten_m); + + // By construction this algorithm generates the shortest possible decimal + // number (Loitsch, Theorem 6.2) which rounds back to w. + // For an input number of precision p, at least + // + // N = 1 + ceil(p * log_10(2)) + // + // decimal digits are sufficient to identify all binary floating-point + // numbers (Matula, "In-and-Out conversions"). + // This implies that the algorithm does not produce more than N decimal + // digits. + // + // N = 17 for p = 53 (IEEE double precision) + // N = 9 for p = 24 (IEEE single precision) +} + +/*! +v = buf * 10^decimal_exponent +len is the length of the buffer (number of decimal digits) +The buffer must be large enough, i.e. >= max_digits10. +*/ +inline void grisu2(char *buf, int &len, int &decimal_exponent, diyfp m_minus, + diyfp v, diyfp m_plus) { + + // --------(-----------------------+-----------------------)-------- (A) + // m- v m+ + // + // --------------------(-----------+-----------------------)-------- (B) + // m- v m+ + // + // First scale v (and m- and m+) such that the exponent is in the range + // [alpha, gamma]. + + const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e); + + const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k + + // The exponent of the products is = v.e + c_minus_k.e + q and is in the range + // [alpha,gamma] + const diyfp w = diyfp::mul(v, c_minus_k); + const diyfp w_minus = diyfp::mul(m_minus, c_minus_k); + const diyfp w_plus = diyfp::mul(m_plus, c_minus_k); + + // ----(---+---)---------------(---+---)---------------(---+---)---- + // w- w w+ + // = c*m- = c*v = c*m+ + // + // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and + // w+ are now off by a small amount. + // In fact: + // + // w - v * 10^k < 1 ulp + // + // To account for this inaccuracy, add resp. subtract 1 ulp. + // + // --------+---[---------------(---+---)---------------]---+-------- + // w- M- w M+ w+ + // + // Now any number in [M-, M+] (bounds included) will round to w when input, + // regardless of how the input rounding algorithm breaks ties. + // + // And digit_gen generates the shortest possible such number in [M-, M+]. + // Note that this does not mean that Grisu2 always generates the shortest + // possible number in the interval (m-, m+). + const diyfp M_minus(w_minus.f + 1, w_minus.e); + const diyfp M_plus(w_plus.f - 1, w_plus.e); + + decimal_exponent = -cached.k; // = -(-k) = k + + grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus); +} + +/*! +v = buf * 10^decimal_exponent +len is the length of the buffer (number of decimal digits) +The buffer must be large enough, i.e. >= max_digits10. +*/ +template +void grisu2(char *buf, int &len, int &decimal_exponent, FloatType value) { + static_assert(diyfp::kPrecision >= std::numeric_limits::digits + 3, + "internal error: not enough precision"); + + // If the neighbors (and boundaries) of 'value' are always computed for + // double-precision numbers, all float's can be recovered using strtod (and + // strtof). However, the resulting decimal representations are not exactly + // "short". + // + // The documentation for 'std::to_chars' + // (https://en.cppreference.com/w/cpp/utility/to_chars) says "value is + // converted to a string as if by std::sprintf in the default ("C") locale" + // and since sprintf promotes float's to double's, I think this is exactly + // what 'std::to_chars' does. On the other hand, the documentation for + // 'std::to_chars' requires that "parsing the representation using the + // corresponding std::from_chars function recovers value exactly". That + // indicates that single precision floating-point numbers should be recovered + // using 'std::strtof'. + // + // NB: If the neighbors are computed for single-precision numbers, there is a + // single float + // (7.0385307e-26f) which can't be recovered using strtod. The resulting + // double precision value is off by 1 ulp. +#if 0 + const boundaries w = compute_boundaries(static_cast(value)); +#else + const boundaries w = compute_boundaries(value); +#endif + + grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus); +} + +/*! +@brief appends a decimal representation of e to buf +@return a pointer to the element following the exponent. +@pre -1000 < e < 1000 +*/ +inline char *append_exponent(char *buf, int e) { + + if (e < 0) { + e = -e; + *buf++ = '-'; + } else { + *buf++ = '+'; + } + + auto k = static_cast(e); + if (k < 10) { + // Always print at least two digits in the exponent. + // This is for compatibility with printf("%g"). + *buf++ = '0'; + *buf++ = static_cast('0' + k); + } else if (k < 100) { + *buf++ = static_cast('0' + k / 10); + k %= 10; + *buf++ = static_cast('0' + k); + } else { + *buf++ = static_cast('0' + k / 100); + k %= 100; + *buf++ = static_cast('0' + k / 10); + k %= 10; + *buf++ = static_cast('0' + k); + } + + return buf; +} + +/*! +@brief prettify v = buf * 10^decimal_exponent +If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point +notation. Otherwise it will be printed in exponential notation. +@pre min_exp < 0 +@pre max_exp > 0 +*/ +inline char *format_buffer(char *buf, int len, int decimal_exponent, + int min_exp, int max_exp) { + + const int k = len; + const int n = len + decimal_exponent; + + // v = buf * 10^(n-k) + // k is the length of the buffer (number of decimal digits) + // n is the position of the decimal point relative to the start of the buffer. + + if (k <= n && n <= max_exp) { + // digits[000] + // len <= max_exp + 2 + + std::memset(buf + k, '0', static_cast(n) - static_cast(k)); + // Make it look like a floating-point number (#362, #378) + // buf[n + 0] = '.'; + // buf[n + 1] = '0'; + return buf + (static_cast(n)); + } + + if (0 < n && n <= max_exp) { + // dig.its + // len <= max_digits10 + 1 + std::memmove(buf + (static_cast(n) + 1), buf + n, + static_cast(k) - static_cast(n)); + buf[n] = '.'; + return buf + (static_cast(k) + 1U); + } + + if (min_exp < n && n <= 0) { + // 0.[000]digits + // len <= 2 + (-min_exp - 1) + max_digits10 + + std::memmove(buf + (2 + static_cast(-n)), buf, + static_cast(k)); + buf[0] = '0'; + buf[1] = '.'; + std::memset(buf + 2, '0', static_cast(-n)); + return buf + (2U + static_cast(-n) + static_cast(k)); + } + + if (k == 1) { + // dE+123 + // len <= 1 + 5 + + buf += 1; + } else { + // d.igitsE+123 + // len <= max_digits10 + 1 + 5 + + std::memmove(buf + 2, buf + 1, static_cast(k) - 1); + buf[1] = '.'; + buf += 1 + static_cast(k); + } + + *buf++ = 'e'; + return append_exponent(buf, n - 1); +} + +} // namespace dtoa_impl + +/*! +The format of the resulting decimal representation is similar to printf's %g +format. Returns an iterator pointing past-the-end of the decimal representation. +@note The input number must be finite, i.e. NaN's and Inf's are not supported. +@note The buffer must be large enough. +@note The result is NOT null-terminated. +*/ +char *to_chars(char *first, const char *last, double value) { + static_cast(last); // maybe unused - fix warning + bool negative = std::signbit(value); + if (negative) { + value = -value; + *first++ = '-'; + } + + if (value == 0) // +-0 + { + *first++ = '0'; + // Make it look like a floating-point number (#362, #378) + if(negative) { + *first++ = '.'; + *first++ = '0'; + } + return first; + } + // Compute v = buffer * 10^decimal_exponent. + // The decimal digits are stored in the buffer, which needs to be interpreted + // as an unsigned decimal integer. + // len is the length of the buffer, i.e. the number of decimal digits. + int len = 0; + int decimal_exponent = 0; + dtoa_impl::grisu2(first, len, decimal_exponent, value); + // Format the buffer like printf("%.*g", prec, value) + constexpr int kMinExp = -4; + constexpr int kMaxExp = std::numeric_limits::digits10; + + return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp, + kMaxExp); +} +} // namespace internal +} // namespace simdjson +/* end file src/to_chars.cpp */ +/* begin file src/from_chars.cpp */ +#include +namespace simdjson { +namespace internal { + +/** + * The code in the internal::from_chars function is meant to handle the floating-point number parsing + * when we have more than 19 digits in the decimal mantissa. This should only be seen + * in adversarial scenarios: we do not expect production systems to even produce + * such floating-point numbers. + * + * The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/) + * who credits Ken Thompson for the design (via a reference to the Go source + * code). See + * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c + * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c + * It is probably not very fast but it is a fallback that should almost never be + * called in real life. Google Wuffs is published under APL 2.0. + **/ + +namespace { +constexpr uint32_t max_digits = 768; +constexpr int32_t decimal_point_range = 2047; +} // namespace + +struct adjusted_mantissa { + uint64_t mantissa; + int power2; + adjusted_mantissa() : mantissa(0), power2(0) {} +}; + +struct decimal { + uint32_t num_digits; + int32_t decimal_point; + bool negative; + bool truncated; + uint8_t digits[max_digits]; +}; + +template struct binary_format { + static constexpr int mantissa_explicit_bits(); + static constexpr int minimum_exponent(); + static constexpr int infinite_power(); + static constexpr int sign_index(); +}; + +template <> constexpr int binary_format::mantissa_explicit_bits() { + return 52; +} + +template <> constexpr int binary_format::minimum_exponent() { + return -1023; +} +template <> constexpr int binary_format::infinite_power() { + return 0x7FF; +} + +template <> constexpr int binary_format::sign_index() { return 63; } + +bool is_integer(char c) noexcept { return (c >= '0' && c <= '9'); } + +// This should always succeed since it follows a call to parse_number. +decimal parse_decimal(const char *&p) noexcept { + decimal answer; + answer.num_digits = 0; + answer.decimal_point = 0; + answer.truncated = false; + answer.negative = (*p == '-'); + if ((*p == '-') || (*p == '+')) { + ++p; + } + + while (*p == '0') { + ++p; + } + while (is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + if (*p == '.') { + ++p; + const char *first_after_period = p; + // if we have not yet encountered a zero, we have to skip it as well + if (answer.num_digits == 0) { + // skip zeros + while (*p == '0') { + ++p; + } + } + while (is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + answer.decimal_point = int32_t(first_after_period - p); + } + if(answer.num_digits > 0) { + const char *preverse = p - 1; + int32_t trailing_zeros = 0; + while ((*preverse == '0') || (*preverse == '.')) { + if(*preverse == '0') { trailing_zeros++; }; + --preverse; + } + answer.decimal_point += int32_t(answer.num_digits); + answer.num_digits -= uint32_t(trailing_zeros); + } + if(answer.num_digits > max_digits ) { + answer.num_digits = max_digits; + answer.truncated = true; + } + if (('e' == *p) || ('E' == *p)) { + ++p; + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + int32_t exp_number = 0; // exponential part + while (is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + answer.decimal_point += (neg_exp ? -exp_number : exp_number); + } + return answer; +} + +// This should always succeed since it follows a call to parse_number. +// Will not read at or beyond the "end" pointer. +decimal parse_decimal(const char *&p, const char * end) noexcept { + decimal answer; + answer.num_digits = 0; + answer.decimal_point = 0; + answer.truncated = false; + if(p == end) { return answer; } // should never happen + answer.negative = (*p == '-'); + if ((*p == '-') || (*p == '+')) { + ++p; + } + + while ((p != end) && (*p == '0')) { + ++p; + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + if ((p != end) && (*p == '.')) { + ++p; + if(p == end) { return answer; } // should never happen + const char *first_after_period = p; + // if we have not yet encountered a zero, we have to skip it as well + if (answer.num_digits == 0) { + // skip zeros + while (*p == '0') { + ++p; + } + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + answer.decimal_point = int32_t(first_after_period - p); + } + if(answer.num_digits > 0) { + const char *preverse = p - 1; + int32_t trailing_zeros = 0; + while ((*preverse == '0') || (*preverse == '.')) { + if(*preverse == '0') { trailing_zeros++; }; + --preverse; + } + answer.decimal_point += int32_t(answer.num_digits); + answer.num_digits -= uint32_t(trailing_zeros); + } + if(answer.num_digits > max_digits ) { + answer.num_digits = max_digits; + answer.truncated = true; + } + if ((p != end) && (('e' == *p) || ('E' == *p))) { + ++p; + if(p == end) { return answer; } // should never happen + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + int32_t exp_number = 0; // exponential part + while ((p != end) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + answer.decimal_point += (neg_exp ? -exp_number : exp_number); + } + return answer; +} + +namespace { + +// remove all final zeroes +inline void trim(decimal &h) { + while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) { + h.num_digits--; + } +} + +uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) { + shift &= 63; + const static uint16_t number_of_digits_decimal_left_shift_table[65] = { + 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, + 0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, + 0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, + 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0, + 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA, + 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, + 0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, + 0x051C, 0x051C, + }; + uint32_t x_a = number_of_digits_decimal_left_shift_table[shift]; + uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1]; + uint32_t num_new_digits = x_a >> 11; + uint32_t pow5_a = 0x7FF & x_a; + uint32_t pow5_b = 0x7FF & x_b; + const static uint8_t + number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = { + 5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, + 3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, + 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2, + 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1, + 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, + 3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, + 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, + 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5, + 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, + 0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, + 8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, + 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2, + 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, + 6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, + 2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, + 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6, + 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3, + 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, + 6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, + 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5, + 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5, + 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, + 1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, + 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, + 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7, + 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, + 3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, + 2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, + 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0, + 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8, + 8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, + 2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, + 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2, + 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5, + 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, + 5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, + 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, + 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9, + 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, + 2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, + 9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, + 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1, + 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, + 1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, + 1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, + 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1, + 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3, + 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, + 3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, + 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3, + 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6, + 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, + 6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, + 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, + 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2, + 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, + 6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5, + }; + const uint8_t *pow5 = + &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a]; + uint32_t i = 0; + uint32_t n = pow5_b - pow5_a; + for (; i < n; i++) { + if (i >= h.num_digits) { + return num_new_digits - 1; + } else if (h.digits[i] == pow5[i]) { + continue; + } else if (h.digits[i] < pow5[i]) { + return num_new_digits - 1; + } else { + return num_new_digits; + } + } + return num_new_digits; +} + +} // end of anonymous namespace + +uint64_t round(decimal &h) { + if ((h.num_digits == 0) || (h.decimal_point < 0)) { + return 0; + } else if (h.decimal_point > 18) { + return UINT64_MAX; + } + // at this point, we know that h.decimal_point >= 0 + uint32_t dp = uint32_t(h.decimal_point); + uint64_t n = 0; + for (uint32_t i = 0; i < dp; i++) { + n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0); + } + bool round_up = false; + if (dp < h.num_digits) { + round_up = h.digits[dp] >= 5; // normally, we round up + // but we may need to round to even! + if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) { + round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1])); + } + } + if (round_up) { + n++; + } + return n; +} + +// computes h * 2^-shift +void decimal_left_shift(decimal &h, uint32_t shift) { + if (h.num_digits == 0) { + return; + } + uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift); + int32_t read_index = int32_t(h.num_digits - 1); + uint32_t write_index = h.num_digits - 1 + num_new_digits; + uint64_t n = 0; + + while (read_index >= 0) { + n += uint64_t(h.digits[read_index]) << shift; + uint64_t quotient = n / 10; + uint64_t remainder = n - (10 * quotient); + if (write_index < max_digits) { + h.digits[write_index] = uint8_t(remainder); + } else if (remainder > 0) { + h.truncated = true; + } + n = quotient; + write_index--; + read_index--; + } + while (n > 0) { + uint64_t quotient = n / 10; + uint64_t remainder = n - (10 * quotient); + if (write_index < max_digits) { + h.digits[write_index] = uint8_t(remainder); + } else if (remainder > 0) { + h.truncated = true; + } + n = quotient; + write_index--; + } + h.num_digits += num_new_digits; + if (h.num_digits > max_digits) { + h.num_digits = max_digits; + } + h.decimal_point += int32_t(num_new_digits); + trim(h); +} + +// computes h * 2^shift +void decimal_right_shift(decimal &h, uint32_t shift) { + uint32_t read_index = 0; + uint32_t write_index = 0; + + uint64_t n = 0; + + while ((n >> shift) == 0) { + if (read_index < h.num_digits) { + n = (10 * n) + h.digits[read_index++]; + } else if (n == 0) { + return; + } else { + while ((n >> shift) == 0) { + n = 10 * n; + read_index++; + } + break; + } + } + h.decimal_point -= int32_t(read_index - 1); + if (h.decimal_point < -decimal_point_range) { // it is zero + h.num_digits = 0; + h.decimal_point = 0; + h.negative = false; + h.truncated = false; + return; + } + uint64_t mask = (uint64_t(1) << shift) - 1; + while (read_index < h.num_digits) { + uint8_t new_digit = uint8_t(n >> shift); + n = (10 * (n & mask)) + h.digits[read_index++]; + h.digits[write_index++] = new_digit; + } + while (n > 0) { + uint8_t new_digit = uint8_t(n >> shift); + n = 10 * (n & mask); + if (write_index < max_digits) { + h.digits[write_index++] = new_digit; + } else if (new_digit > 0) { + h.truncated = true; + } + } + h.num_digits = write_index; + trim(h); +} + +template adjusted_mantissa compute_float(decimal &d) { + adjusted_mantissa answer; + if (d.num_digits == 0) { + // should be zero + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + // At this point, going further, we can assume that d.num_digits > 0. + // We want to guard against excessive decimal point values because + // they can result in long running times. Indeed, we do + // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22 + // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not + // fine (runs for a long time). + // + if(d.decimal_point < -324) { + // We have something smaller than 1e-324 which is always zero + // in binary64 and binary32. + // It should be zero. + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } else if(d.decimal_point >= 310) { + // We have something at least as large as 0.1e310 which is + // always infinite. + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + + static const uint32_t max_shift = 60; + static const uint32_t num_powers = 19; + static const uint8_t powers[19] = { + 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, // + 33, 36, 39, 43, 46, 49, 53, 56, 59, // + }; + int32_t exp2 = 0; + while (d.decimal_point > 0) { + uint32_t n = uint32_t(d.decimal_point); + uint32_t shift = (n < num_powers) ? powers[n] : max_shift; + decimal_right_shift(d, shift); + if (d.decimal_point < -decimal_point_range) { + // should be zero + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + exp2 += int32_t(shift); + } + // We shift left toward [1/2 ... 1]. + while (d.decimal_point <= 0) { + uint32_t shift; + if (d.decimal_point == 0) { + if (d.digits[0] >= 5) { + break; + } + shift = (d.digits[0] < 2) ? 2 : 1; + } else { + uint32_t n = uint32_t(-d.decimal_point); + shift = (n < num_powers) ? powers[n] : max_shift; + } + decimal_left_shift(d, shift); + if (d.decimal_point > decimal_point_range) { + // we want to get infinity: + answer.power2 = 0xFF; + answer.mantissa = 0; + return answer; + } + exp2 -= int32_t(shift); + } + // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2]. + exp2--; + constexpr int32_t minimum_exponent = binary::minimum_exponent(); + while ((minimum_exponent + 1) > exp2) { + uint32_t n = uint32_t((minimum_exponent + 1) - exp2); + if (n > max_shift) { + n = max_shift; + } + decimal_right_shift(d, n); + exp2 += int32_t(n); + } + if ((exp2 - minimum_exponent) >= binary::infinite_power()) { + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + + const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1; + decimal_left_shift(d, mantissa_size_in_bits); + + uint64_t mantissa = round(d); + // It is possible that we have an overflow, in which case we need + // to shift back. + if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) { + decimal_right_shift(d, 1); + exp2 += 1; + mantissa = round(d); + if ((exp2 - minimum_exponent) >= binary::infinite_power()) { + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + } + answer.power2 = exp2 - binary::minimum_exponent(); + if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) { + answer.power2--; + } + answer.mantissa = + mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1); + return answer; +} + +template +adjusted_mantissa parse_long_mantissa(const char *first) { + decimal d = parse_decimal(first); + return compute_float(d); +} + +template +adjusted_mantissa parse_long_mantissa(const char *first, const char *end) { + decimal d = parse_decimal(first, end); + return compute_float(d); +} + +double from_chars(const char *first) noexcept { + bool negative = first[0] == '-'; + if (negative) { + first++; + } + adjusted_mantissa am = parse_long_mantissa>(first); + uint64_t word = am.mantissa; + word |= uint64_t(am.power2) + << binary_format::mantissa_explicit_bits(); + word = negative ? word | (uint64_t(1) << binary_format::sign_index()) + : word; + double value; + std::memcpy(&value, &word, sizeof(double)); + return value; +} + + +double from_chars(const char *first, const char *end) noexcept { + bool negative = first[0] == '-'; + if (negative) { + first++; + } + adjusted_mantissa am = parse_long_mantissa>(first, end); + uint64_t word = am.mantissa; + word |= uint64_t(am.power2) + << binary_format::mantissa_explicit_bits(); + word = negative ? word | (uint64_t(1) << binary_format::sign_index()) + : word; + double value; + std::memcpy(&value, &word, sizeof(double)); + return value; +} + +} // internal +} // simdjson +/* end file src/from_chars.cpp */ +/* begin file src/internal/error_tables.cpp */ + +namespace simdjson { +namespace internal { + + SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[] { + { SUCCESS, "No error" }, + { CAPACITY, "This parser can't support a document that big" }, + { MEMALLOC, "Error allocating memory, we're most likely out of memory" }, + { TAPE_ERROR, "The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." }, + { DEPTH_ERROR, "The JSON document was too deep (too many nested objects and arrays)" }, + { STRING_ERROR, "Problem while parsing a string" }, + { T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'" }, + { F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'" }, + { N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'" }, + { NUMBER_ERROR, "Problem while parsing a number" }, + { UTF8_ERROR, "The input is not valid UTF-8" }, + { UNINITIALIZED, "Uninitialized" }, + { EMPTY, "Empty: no JSON found" }, + { UNESCAPED_CHARS, "Within strings, some characters must be escaped, we found unescaped characters" }, + { UNCLOSED_STRING, "A string is opened, but never closed." }, + { UNSUPPORTED_ARCHITECTURE, "simdjson does not have an implementation supported by this CPU architecture (perhaps it's a non-SIMD CPU?)." }, + { INCORRECT_TYPE, "The JSON element does not have the requested type." }, + { NUMBER_OUT_OF_RANGE, "The JSON number is too large or too small to fit within the requested type." }, + { INDEX_OUT_OF_BOUNDS, "Attempted to access an element of a JSON array that is beyond its length." }, + { NO_SUCH_FIELD, "The JSON field referenced does not exist in this object." }, + { IO_ERROR, "Error reading the file." }, + { INVALID_JSON_POINTER, "Invalid JSON pointer syntax." }, + { INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." }, + { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" }, + { PARSER_IN_USE, "Cannot parse a new document while a document is still in use." }, + { OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." }, + { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." }, + { INCOMPLETE_ARRAY_OR_OBJECT, "JSON document ended early in the middle of an object or array." }, + { SCALAR_DOCUMENT_AS_VALUE, "A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, + { OUT_OF_BOUNDS, "Attempted to access location outside of document."} + }; // error_messages[] + +} // namespace internal +} // namespace simdjson +/* end file src/internal/error_tables.cpp */ +/* begin file src/internal/jsoncharutils_tables.cpp */ + +namespace simdjson { +namespace internal { + +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + +SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + +SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, + 0x6, 0x7, 0x8, 0x9, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa, + 0xb, 0xc, 0xd, 0xe, 0xf, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa, 0xb, 0xc, 0xd, 0xe, + 0xf, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x10, 0x20, 0x30, 0x40, 0x50, + 0x60, 0x70, 0x80, 0x90, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa0, + 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, + 0xf0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x100, 0x200, 0x300, 0x400, 0x500, + 0x600, 0x700, 0x800, 0x900, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa00, + 0xb00, 0xc00, 0xd00, 0xe00, 0xf00, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa00, 0xb00, 0xc00, 0xd00, 0xe00, + 0xf00, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, + 0x6000, 0x7000, 0x8000, 0x9000, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa000, + 0xb000, 0xc000, 0xd000, 0xe000, 0xf000, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa000, 0xb000, 0xc000, 0xd000, 0xe000, + 0xf000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; + +} // namespace internal +} // namespace simdjson +/* end file src/internal/jsoncharutils_tables.cpp */ +/* begin file src/internal/numberparsing_tables.cpp */ + +namespace simdjson { +namespace internal { + +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + + +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]= { + 0xeef453d6923bd65a,0x113faa2906a13b3f, + 0x9558b4661b6565f8,0x4ac7ca59a424c507, + 0xbaaee17fa23ebf76,0x5d79bcf00d2df649, + 0xe95a99df8ace6f53,0xf4d82c2c107973dc, + 0x91d8a02bb6c10594,0x79071b9b8a4be869, + 0xb64ec836a47146f9,0x9748e2826cdee284, + 0xe3e27a444d8d98b7,0xfd1b1b2308169b25, + 0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f,0xbdbd2d335e51a935, + 0xde8b2b66b3bc4723,0xad2c788035e61382, + 0x8b16fb203055ac76,0x4c3bcb5021afcc31, + 0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78,0xd71d6dad34a2af0d, + 0x87d4713d6f33aa6b,0x8672648c40e5ad68, + 0xa9c98d8ccb009506,0x680efdaf511f18c2, + 0xd43bf0effdc0ba48,0x212bd1b2566def2, + 0x84a57695fe98746d,0x14bb630f7604b57, + 0xa5ced43b7e3e9188,0x419ea3bd35385e2d, + 0xcf42894a5dce35ea,0x52064cac828675b9, + 0x818995ce7aa0e1b2,0x7343efebd1940993, + 0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8, + 0xca66fa129f9b60a6,0xd41a26e077774ef6, + 0xfd00b897478238d0,0x8920b098955522b4, + 0x9e20735e8cb16382,0x55b46e5f5d5535b0, + 0xc5a890362fddbc62,0xeb2189f734aa831d, + 0xf712b443bbd52b7b,0xa5e9ec7501d523e4, + 0x9a6bb0aa55653b2d,0x47b233c92125366e, + 0xc1069cd4eabe89f8,0x999ec0bb696e840a, + 0xf148440a256e2c76,0xc00670ea43ca250d, + 0x96cd2a865764dbca,0x380406926a5e5728, + 0xbc807527ed3e12bc,0xc605083704f5ecf2, + 0xeba09271e88d976b,0xf7864a44c633682e, + 0x93445b8731587ea3,0x7ab3ee6afbe0211d, + 0xb8157268fdae9e4c,0x5960ea05bad82964, + 0xe61acf033d1a45df,0x6fb92487298e33bd, + 0x8fd0c16206306bab,0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696,0x8f48a4899877186c, + 0xe0b62e2929aba83c,0x331acdabfe94de87, + 0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14, + 0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a,0xc9e82cd9f69d6150, + 0x892731ac9faf056e,0xbe311c083a225cd2, + 0xab70fe17c79ac6ca,0x6dbd630a48aaf406, + 0xd64d3d9db981787d,0x92cbbccdad5b108, + 0x85f0468293f0eb4e,0x25bbf56008c58ea5, + 0xa76c582338ed2621,0xaf2af2b80af6f24e, + 0xd1476e2c07286faa,0x1af5af660db4aee1, + 0x82cca4db847945ca,0x50d98d9fc890ed4d, + 0xa37fce126597973c,0xe50ff107bab528a0, + 0xcc5fc196fefd7d0c,0x1e53ed49a96272c8, + 0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a, + 0x9faacf3df73609b1,0x77b191618c54e9ac, + 0xc795830d75038c1d,0xd59df5b9ef6a2417, + 0xf97ae3d0d2446f25,0x4b0573286b44ad1d, + 0x9becce62836ac577,0x4ee367f9430aec32, + 0xc2e801fb244576d5,0x229c41f793cda73f, + 0xf3a20279ed56d48a,0x6b43527578c1110f, + 0x9845418c345644d6,0x830a13896b78aaa9, + 0xbe5691ef416bd60c,0x23cc986bc656d553, + 0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9, + 0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53, + 0xe858ad248f5c22c9,0xd1b3400f8f9cff68, + 0x91376c36d99995be,0x23100809b9c21fa1, + 0xb58547448ffffb2d,0xabd40a0c2832a78a, + 0xe2e69915b3fff9f9,0x16c90c8f323f516c, + 0x8dd01fad907ffc3b,0xae3da7d97f6792e3, + 0xb1442798f49ffb4a,0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d,0x40405643d711d583, + 0x8a7d3eef7f1cfc52,0x482835ea666b2572, + 0xad1c8eab5ee43b66,0xda3243650005eecf, + 0xd863b256369d4a40,0x90bed43e40076a82, + 0x873e4f75e2224e68,0x5a7744a6e804a291, + 0xa90de3535aaae202,0x711515d0a205cb36, + 0xd3515c2831559a83,0xd5a5b44ca873e03, + 0x8412d9991ed58091,0xe858790afe9486c2, + 0xa5178fff668ae0b6,0x626e974dbe39a872, + 0xce5d73ff402d98e3,0xfb0a3d212dc8128f, + 0x80fa687f881c7f8e,0x7ce66634bc9d0b99, + 0xa139029f6a239f72,0x1c1fffc1ebc44e80, + 0xc987434744ac874e,0xa327ffb266b56220, + 0xfbe9141915d7a922,0x4bf1ff9f0062baa8, + 0x9d71ac8fada6c9b5,0x6f773fc3603db4a9, + 0xc4ce17b399107c22,0xcb550fb4384d21d3, + 0xf6019da07f549b2b,0x7e2a53a146606a48, + 0x99c102844f94e0fb,0x2eda7444cbfc426d, + 0xc0314325637a1939,0xfa911155fefb5308, + 0xf03d93eebc589f88,0x793555ab7eba27ca, + 0x96267c7535b763b5,0x4bc1558b2f3458de, + 0xbbb01b9283253ca2,0x9eb1aaedfb016f16, + 0xea9c227723ee8bcb,0x465e15a979c1cadc, + 0x92a1958a7675175f,0xbfacd89ec191ec9, + 0xb749faed14125d36,0xcef980ec671f667b, + 0xe51c79a85916f484,0x82b7e12780e7401a, + 0x8f31cc0937ae58d2,0xd1b2ecb8b0908810, + 0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9,0x67a791e093e1d49a, + 0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0, + 0xaecc49914078536d,0x58fae9f773886e18, + 0xda7f5bf590966848,0xaf39a475506a899e, + 0x888f99797a5e012d,0x6d8406c952429603, + 0xaab37fd7d8f58178,0xc8e5087ba6d33b83, + 0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64, + 0x855c3be0a17fcd26,0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e, + 0xd0601d8efc57b08b,0xf13b94daf124da26, + 0x823c12795db6ce57,0x76c53d08d6b70858, + 0xa2cb1717b52481ed,0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268,0xa9942f5dcf7dfd09, + 0xfe5d54150b090b02,0xd3f93b35435d7c4c, + 0x9efa548d26e5a6e1,0xc47bc5014a1a6daf, + 0xc6b8e9b0709f109a,0x359ab6419ca1091b, + 0xf867241c8cc6d4c0,0xc30163d203c94b62, + 0x9b407691d7fc44f8,0x79e0de63425dcf1d, + 0xc21094364dfb5636,0x985915fc12f542e4, + 0xf294b943e17a2bc4,0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a,0xa705992ceecf9c42, + 0xbd8430bd08277231,0x50c6ff782a838353, + 0xece53cec4a314ebd,0xa4f8bf5635246428, + 0x940f4613ae5ed136,0x871b7795e136be99, + 0xb913179899f68584,0x28e2557b59846e3f, + 0xe757dd7ec07426e5,0x331aeada2fe589cf, + 0x9096ea6f3848984f,0x3ff0d2c85def7621, + 0xb4bca50b065abe63,0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb,0xd3e8495912c62894, + 0x8d3360f09cf6e4bd,0x64712dd7abbbd95c, + 0xb080392cc4349dec,0xbd8d794d96aacfb3, + 0xdca04777f541c567,0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60,0xf41686c49db57244, + 0xac5d37d5b79b6239,0x311c2875c522ced5, + 0xd77485cb25823ac7,0x7d633293366b828b, + 0x86a8d39ef77164bc,0xae5dff9c02033197, + 0xa8530886b54dbdeb,0xd9f57f830283fdfc, + 0xd267caa862a12d66,0xd072df63c324fd7b, + 0x8380dea93da4bc60,0x4247cb9e59f71e6d, + 0xa46116538d0deb78,0x52d9be85f074e608, + 0xcd795be870516656,0x67902e276c921f8b, + 0x806bd9714632dff6,0xba1cd8a3db53b6, + 0xa086cfcd97bf97f3,0x80e8a40eccd228a4, + 0xc8a883c0fdaf7df0,0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c,0x796b805720085f81, + 0x9cc3a6eec6311a63,0xcbe3303674053bb0, + 0xc3f490aa77bd60fc,0xbedbfc4411068a9c, + 0xf4f1b4d515acb93b,0xee92fb5515482d44, + 0x991711052d8bf3c5,0x751bdd152d4d1c4a, + 0xbf5cd54678eef0b6,0xd262d45a78a0635d, + 0xef340a98172aace4,0x86fb897116c87c34, + 0x9580869f0e7aac0e,0xd45d35e6ae3d4da0, + 0xbae0a846d2195712,0x8974836059cca109, + 0xe998d258869facd7,0x2bd1a438703fc94b, + 0x91ff83775423cc06,0x7b6306a34627ddcf, + 0xb67f6455292cbf08,0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca,0x20caba5f1d9e4a93, + 0x8e938662882af53e,0x547eb47b7282ee9c, + 0xb23867fb2a35b28d,0xe99e619a4f23aa43, + 0xdec681f9f4c31f31,0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e,0xde83bc408dd3dd04, + 0xae0b158b4738705e,0x9624ab50b148d445, + 0xd98ddaee19068c76,0x3badd624dd9b0957, + 0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c, + 0xd47487cc8470652b,0x7647c3200069671f, + 0x84c8d4dfd2c63f3b,0x29ecd9f40041e073, + 0xa5fb0a17c777cf09,0xf468107100525890, + 0xcf79cc9db955c2cc,0x7182148d4066eeb4, + 0x81ac1fe293d599bf,0xc6f14cd848405530, + 0xa21727db38cb002f,0xb8ada00e5a506a7c, + 0xca9cf1d206fdc03b,0xa6d90811f0e4851c, + 0xfd442e4688bd304a,0x908f4a166d1da663, + 0x9e4a9cec15763e2e,0x9a598e4e043287fe, + 0xc5dd44271ad3cdba,0x40eff1e1853f29fd, + 0xf7549530e188c128,0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9,0x82bb74f8301958ce, + 0xc13a148e3032d6e7,0xe36a52363c1faf01, + 0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1, + 0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de,0x7415d448f6b6f0e7, + 0xebdf661791d60f56,0x111b495b3464ad21, + 0x936b9fcebb25c995,0xcab10dd900beec34, + 0xb84687c269ef3bfb,0x3d5d514f40eea742, + 0xe65829b3046b0afa,0xcb4a5a3112a5112, + 0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab, + 0xb3f4e093db73a093,0x59ed216765690f56, + 0xe0f218b8d25088b8,0x306869c13ec3532c, + 0x8c974f7383725573,0x1e414218c73a13fb, + 0xafbd2350644eeacf,0xe5d1929ef90898fa, + 0xdbac6c247d62a583,0xdf45f746b74abf39, + 0x894bc396ce5da772,0x6b8bba8c328eb783, + 0xab9eb47c81f5114f,0x66ea92f3f326564, + 0xd686619ba27255a2,0xc80a537b0efefebd, + 0x8613fd0145877585,0xbd06742ce95f5f36, + 0xa798fc4196e952e7,0x2c48113823b73704, + 0xd17f3b51fca3a7a0,0xf75a15862ca504c5, + 0x82ef85133de648c4,0x9a984d73dbe722fb, + 0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba, + 0xcc963fee10b7d1b3,0x318df905079926a8, + 0xffbbcfe994e5c61f,0xfdf17746497f7052, + 0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633, + 0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0, + 0xf9bd690a1b68637b,0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d,0x6bea10ca65c084e, + 0xc31bfa0fe5698db8,0x486e494fcff30a62, + 0xf3e2f893dec3f126,0x5a89dba3c3efccfa, + 0x986ddb5c6b3a76b7,0xf89629465a75e01c, + 0xbe89523386091465,0xf6bbb397f1135823, + 0xee2ba6c0678b597f,0x746aa07ded582e2c, + 0x94db483840b717ef,0xa8c2a44eb4571cdc, + 0xba121a4650e4ddeb,0x92f34d62616ce413, + 0xe896a0d7e51e1566,0x77b020baf9c81d17, + 0x915e2486ef32cd60,0xace1474dc1d122e, + 0xb5b5ada8aaff80b8,0xd819992132456ba, + 0xe3231912d5bf60e6,0x10e1fff697ed6c69, + 0x8df5efabc5979c8f,0xca8d3ffa1ef463c1, + 0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2, + 0xddd0467c64bce4a0,0xac7cb3f6d05ddbde, + 0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d,0x86c16c98d2c953c6, + 0xd89d64d57a607744,0xe871c7bf077ba8b7, + 0x87625f056c7c4a8b,0x11471cd764ad4972, + 0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf, + 0xd389b47879823479,0x4aff1d108d4ec2c3, + 0x843610cb4bf160cb,0xcedf722a585139ba, + 0xa54394fe1eedb8fe,0xc2974eb4ee658828, + 0xce947a3da6a9273e,0x733d226229feea32, + 0x811ccc668829b887,0x806357d5a3f525f, + 0xa163ff802a3426a8,0xca07c2dcb0cf26f7, + 0xc9bcff6034c13052,0xfc89b393dd02f0b5, + 0xfc2c3f3841f17c67,0xbbac2078d443ace2, + 0x9d9ba7832936edc0,0xd54b944b84aa4c0d, + 0xc5029163f384a931,0xa9e795e65d4df11, + 0xf64335bcf065d37d,0x4d4617b5ff4a16d5, + 0x99ea0196163fa42e,0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39,0xe45ec2862f71e1d6, + 0xf07da27a82c37088,0x5d767327bb4e5a4c, + 0x964e858c91ba2655,0x3a6a07f8d510f86f, + 0xbbe226efb628afea,0x890489f70a55368b, + 0xeadab0aba3b2dbe5,0x2b45ac74ccea842e, + 0x92c8ae6b464fc96f,0x3b0b8bc90012929d, + 0xb77ada0617e3bbcb,0x9ce6ebb40173744, + 0xe55990879ddcaabd,0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6,0x9fa946824a12232d, + 0xb32df8e9f3546564,0x47939822dc96abf9, + 0xdff9772470297ebd,0x59787e2b93bc56f7, + 0x8bfbea76c619ef36,0x57eb4edb3c55b65a, + 0xaefae51477a06b03,0xede622920b6b23f1, + 0xdab99e59958885c4,0xe95fab368e45eced, + 0x88b402f7fd75539b,0x11dbcb0218ebb414, + 0xaae103b5fcd2a881,0xd652bdc29f26a119, + 0xd59944a37c0752a2,0x4be76d3346f0495f, + 0x857fcae62d8493a5,0x6f70a4400c562ddb, + 0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952, + 0xd097ad07a71f26b2,0x7e2000a41346a7a7, + 0x825ecc24c873782f,0x8ed400668c0c28c8, + 0xa2f67f2dfa90563b,0x728900802f0f32fa, + 0xcbb41ef979346bca,0x4f2b40a03ad2ffb9, + 0xfea126b7d78186bc,0xe2f610c84987bfa8, + 0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9, + 0xc6ede63fa05d3143,0x91503d1c79720dbb, + 0xf8a95fcf88747d94,0x75a44c6397ce912a, + 0x9b69dbe1b548ce7c,0xc986afbe3ee11aba, + 0xc24452da229b021b,0xfbe85badce996168, + 0xf2d56790ab41c2a2,0xfae27299423fb9c3, + 0x97c560ba6b0919a5,0xdccd879fc967d41a, + 0xbdb6b8e905cb600f,0x5400e987bbc1c920, + 0xed246723473e3813,0x290123e9aab23b68, + 0x9436c0760c86e30b,0xf9a0b6720aaf6521, + 0xb94470938fa89bce,0xf808e40e8d5b3e69, + 0xe7958cb87392c2c2,0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2, + 0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3, + 0xe2280b6c20dd5232,0x25c6da63c38de1b0, + 0x8d590723948a535f,0x579c487e5a38ad0e, + 0xb0af48ec79ace837,0x2d835a9df0c6d851, + 0xdcdb1b2798182244,0xf8e431456cf88e65, + 0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff, + 0xac8b2d36eed2dac5,0xe272467e3d222f3f, + 0xd7adf884aa879177,0x5b0ed81dcc6abb0f, + 0x86ccbb52ea94baea,0x98e947129fc2b4e9, + 0xa87fea27a539e9a5,0x3f2398d747b36224, + 0xd29fe4b18e88640e,0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89,0x1953cf68300424ac, + 0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7, + 0xcdb02555653131b6,0x3792f412cb06794d, + 0x808e17555f3ebf11,0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4, + 0xc8de047564d20a8b,0xf245825a5a445275, + 0xfb158592be068d2e,0xeed6e2f0f0d56712, + 0x9ced737bb6c4183d,0x55464dd69685606b, + 0xc428d05aa4751e4c,0xaa97e14c3c26b886, + 0xf53304714d9265df,0xd53dd99f4b3066a8, + 0x993fe2c6d07b7fab,0xe546a8038efe4029, + 0xbf8fdb78849a5f96,0xde98520472bdd033, + 0xef73d256a5c0f77c,0x963e66858f6d4440, + 0x95a8637627989aad,0xdde7001379a44aa8, + 0xbb127c53b17ec159,0x5560c018580d5d52, + 0xe9d71b689dde71af,0xaab8f01e6e10b4a6, + 0x9226712162ab070d,0xcab3961304ca70e8, + 0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22, + 0xe45c10c42a2b3b05,0x8cb89a7db77c506a, + 0x8eb98a7a9a5b04e3,0x77f3608e92adb242, + 0xb267ed1940f1c61c,0x55f038b237591ed3, + 0xdf01e85f912e37a3,0x6b6c46dec52f6688, + 0x8b61313bbabce2c6,0x2323ac4b3b3da015, + 0xae397d8aa96c1b77,0xabec975e0a0d081a, + 0xd9c7dced53c72255,0x96e7bd358c904a21, + 0x881cea14545c7575,0x7e50d64177da2e54, + 0xaa242499697392d2,0xdde50bd1d5d0b9e9, + 0xd4ad2dbfc3d07787,0x955e4ec64b44e864, + 0x84ec3c97da624ab4,0xbd5af13bef0b113e, + 0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e, + 0xcfb11ead453994ba,0x67de18eda5814af2, + 0x81ceb32c4b43fcf4,0x80eacf948770ced7, + 0xa2425ff75e14fc31,0xa1258379a94d028d, + 0xcad2f7f5359a3b3e,0x96ee45813a04330, + 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, + 0x9e74d1b791e07e48,0x775ea264cf55347e, + 0xc612062576589dda,0x95364afe032a81a0, + 0xf79687aed3eec551,0x3a83ddbd83f52210, + 0x9abe14cd44753b52,0xc4926a9672793580, + 0xc16d9a0095928a27,0x75b7053c0f178400, + 0xf1c90080baf72cb1,0x5324c68b12dd6800, + 0x971da05074da7bee,0xd3f6fc16ebca8000, + 0xbce5086492111aea,0x88f4bb1ca6bd0000, + 0xec1e4a7db69561a5,0x2b31e9e3d0700000, + 0x9392ee8e921d5d07,0x3aff322e62600000, + 0xb877aa3236a4b449,0x9befeb9fad487c3, + 0xe69594bec44de15b,0x4c2ebe687989a9b4, + 0x901d7cf73ab0acd9,0xf9d37014bf60a11, + 0xb424dc35095cd80f,0x538484c19ef38c95, + 0xe12e13424bb40e13,0x2865a5f206b06fba, + 0x8cbccc096f5088cb,0xf93f87b7442e45d4, + 0xafebff0bcb24aafe,0xf78f69a51539d749, + 0xdbe6fecebdedd5be,0xb573440e5a884d1c, + 0x89705f4136b4a597,0x31680a88f8953031, + 0xabcc77118461cefc,0xfdc20d2b36ba7c3e, + 0xd6bf94d5e57a42bc,0x3d32907604691b4d, + 0x8637bd05af6c69b5,0xa63f9a49c2c1b110, + 0xa7c5ac471b478423,0xfcf80dc33721d54, + 0xd1b71758e219652b,0xd3c36113404ea4a9, + 0x83126e978d4fdf3b,0x645a1cac083126ea, + 0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4, + 0xcccccccccccccccc,0xcccccccccccccccd, + 0x8000000000000000,0x0, + 0xa000000000000000,0x0, + 0xc800000000000000,0x0, + 0xfa00000000000000,0x0, + 0x9c40000000000000,0x0, + 0xc350000000000000,0x0, + 0xf424000000000000,0x0, + 0x9896800000000000,0x0, + 0xbebc200000000000,0x0, + 0xee6b280000000000,0x0, + 0x9502f90000000000,0x0, + 0xba43b74000000000,0x0, + 0xe8d4a51000000000,0x0, + 0x9184e72a00000000,0x0, + 0xb5e620f480000000,0x0, + 0xe35fa931a0000000,0x0, + 0x8e1bc9bf04000000,0x0, + 0xb1a2bc2ec5000000,0x0, + 0xde0b6b3a76400000,0x0, + 0x8ac7230489e80000,0x0, + 0xad78ebc5ac620000,0x0, + 0xd8d726b7177a8000,0x0, + 0x878678326eac9000,0x0, + 0xa968163f0a57b400,0x0, + 0xd3c21bcecceda100,0x0, + 0x84595161401484a0,0x0, + 0xa56fa5b99019a5c8,0x0, + 0xcecb8f27f4200f3a,0x0, + 0x813f3978f8940984,0x4000000000000000, + 0xa18f07d736b90be5,0x5000000000000000, + 0xc9f2c9cd04674ede,0xa400000000000000, + 0xfc6f7c4045812296,0x4d00000000000000, + 0x9dc5ada82b70b59d,0xf020000000000000, + 0xc5371912364ce305,0x6c28000000000000, + 0xf684df56c3e01bc6,0xc732000000000000, + 0x9a130b963a6c115c,0x3c7f400000000000, + 0xc097ce7bc90715b3,0x4b9f100000000000, + 0xf0bdc21abb48db20,0x1e86d40000000000, + 0x96769950b50d88f4,0x1314448000000000, + 0xbc143fa4e250eb31,0x17d955a000000000, + 0xeb194f8e1ae525fd,0x5dcfab0800000000, + 0x92efd1b8d0cf37be,0x5aa1cae500000000, + 0xb7abc627050305ad,0xf14a3d9e40000000, + 0xe596b7b0c643c719,0x6d9ccd05d0000000, + 0x8f7e32ce7bea5c6f,0xe4820023a2000000, + 0xb35dbf821ae4f38b,0xdda2802c8a800000, + 0xe0352f62a19e306e,0xd50b2037ad200000, + 0x8c213d9da502de45,0x4526f422cc340000, + 0xaf298d050e4395d6,0x9670b12b7f410000, + 0xdaf3f04651d47b4c,0x3c0cdd765f114000, + 0x88d8762bf324cd0f,0xa5880a69fb6ac800, + 0xab0e93b6efee0053,0x8eea0d047a457a00, + 0xd5d238a4abe98068,0x72a4904598d6d880, + 0x85a36366eb71f041,0x47a6da2b7f864750, + 0xa70c3c40a64e6c51,0x999090b65f67d924, + 0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d, + 0x82818f1281ed449f,0xbff8f10e7a8921a4, + 0xa321f2d7226895c7,0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39,0x9bf4f8a69f764490, + 0xfee50b7025c36a08,0x2f236d04753d5b4, + 0x9f4f2726179a2245,0x1d762422c946590, + 0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b,0xd2e0898765a7deb2, + 0x9b934c3b330c8577,0x63cc55f49f88eb2f, + 0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb, + 0xf316271c7fc3908a,0x8bef464e3945ef7a, + 0x97edd871cfda3a56,0x97758bf0e3cbb5ac, + 0xbde94e8e43d0c8ec,0x3d52eeed1cbea317, + 0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd, + 0x945e455f24fb1cf8,0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436,0xb3e2fd538e122b44, + 0xe7d34c64a9c85d44,0x60dbbca87196b616, + 0x90e40fbeea1d3a4a,0xbc8955e946fe31cd, + 0xb51d13aea4a488dd,0x6babab6398bdbe41, + 0xe264589a4dcdab14,0xc696963c7eed2dd1, + 0x8d7eb76070a08aec,0xfc1e1de5cf543ca2, + 0xb0de65388cc8ada8,0x3b25a55f43294bcb, + 0xdd15fe86affad912,0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab,0x6e3569326c784337, + 0xacb92ed9397bf996,0x49c2c37f07965404, + 0xd7e77a8f87daf7fb,0xdc33745ec97be906, + 0x86f0ac99b4e8dafd,0x69a028bb3ded71a3, + 0xa8acd7c0222311bc,0xc40832ea0d68ce0c, + 0xd2d80db02aabd62b,0xf50a3fa490c30190, + 0x83c7088e1aab65db,0x792667c6da79e0fa, + 0xa4b8cab1a1563f52,0x577001b891185938, + 0xcde6fd5e09abcf26,0xed4c0226b55e6f86, + 0x80b05e5ac60b6178,0x544f8158315b05b4, + 0xa0dc75f1778e39d6,0x696361ae3db1c721, + 0xc913936dd571c84c,0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f,0x4ab48a04065c723, + 0x9d174b2dcec0e47b,0x62eb0d64283f9c76, + 0xc45d1df942711d9a,0x3ba5d0bd324f8394, + 0xf5746577930d6500,0xca8f44ec7ee36479, + 0x9968bf6abbe85f20,0x7e998b13cf4e1ecb, + 0xbfc2ef456ae276e8,0x9e3fedd8c321a67e, + 0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e, + 0x95d04aee3b80ece5,0xbba1f1d158724a12, + 0xbb445da9ca61281f,0x2a8a6e45ae8edc97, + 0xea1575143cf97226,0xf52d09d71a3293bd, + 0x924d692ca61be758,0x593c2626705f9c56, + 0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c, + 0xe498f455c38b997a,0xb6dfb9c0f956447, + 0x8edf98b59a373fec,0x4724bd4189bd5eac, + 0xb2977ee300c50fe7,0x58edec91ec2cb657, + 0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed, + 0x8b865b215899f46c,0xbd79e0d20082ee74, + 0xae67f1e9aec07187,0xecd8590680a3aa11, + 0xda01ee641a708de9,0xe80e6f4820cc9495, + 0x884134fe908658b2,0x3109058d147fdcdd, + 0xaa51823e34a7eede,0xbd4b46f0599fd415, + 0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a, + 0x850fadc09923329e,0x3e2cf6bc604ddb0, + 0xa6539930bf6bff45,0x84db8346b786151c, + 0xcfe87f7cef46ff16,0xe612641865679a63, + 0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e, + 0xa26da3999aef7749,0xe3be5e330f38f09d, + 0xcb090c8001ab551c,0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63,0x73d9732fc7c8f7f6, + 0x9e9f11c4014dda7e,0x2867e7fddcdd9afa, + 0xc646d63501a1511d,0xb281e1fd541501b8, + 0xf7d88bc24209a565,0x1f225a7ca91a4226, + 0x9ae757596946075f,0x3375788de9b06958, + 0xc1a12d2fc3978937,0x52d6b1641c83ae, + 0xf209787bb47d6b84,0xc0678c5dbd23a49a, + 0x9745eb4d50ce6332,0xf840b7ba963646e0, + 0xbd176620a501fbff,0xb650e5a93bc3d898, + 0xec5d3fa8ce427aff,0xa3e51f138ab4cebe, + 0x93ba47c980e98cdf,0xc66f336c36b10137, + 0xb8a8d9bbe123f017,0xb80b0047445d4184, + 0xe6d3102ad96cec1d,0xa60dc059157491e5, + 0x9043ea1ac7e41392,0x87c89837ad68db2f, + 0xb454e4a179dd1877,0x29babe4598c311fb, + 0xe16a1dc9d8545e94,0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d,0x1899e4a65f58660c, + 0xb01ae745b101e9e4,0x5ec05dcff72e7f8f, + 0xdc21a1171d42645d,0x76707543f4fa1f73, + 0x899504ae72497eba,0x6a06494a791c53a8, + 0xabfa45da0edbde69,0x487db9d17636892, + 0xd6f8d7509292d603,0x45a9d2845d3c42b6, + 0x865b86925b9bc5c2,0xb8a2392ba45a9b2, + 0xa7f26836f282b732,0x8e6cac7768d7141e, + 0xd1ef0244af2364ff,0x3207d795430cd926, + 0x8335616aed761f1f,0x7f44e6bd49e807b8, + 0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6, + 0xcd036837130890a1,0x36dba887c37a8c0f, + 0x802221226be55a64,0xc2494954da2c9789, + 0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c, + 0xc83553c5c8965d3d,0x6f92829494e5acc7, + 0xfa42a8b73abbf48c,0xcb772339ba1f17f9, + 0x9c69a97284b578d7,0xff2a760414536efb, + 0xc38413cf25e2d70d,0xfef5138519684aba, + 0xf46518c2ef5b8cd1,0x7eb258665fc25d69, + 0x98bf2f79d5993802,0xef2f773ffbd97a61, + 0xbeeefb584aff8603,0xaafb550ffacfd8fa, + 0xeeaaba2e5dbf6784,0x95ba2a53f983cf38, + 0x952ab45cfa97a0b2,0xdd945a747bf26183, + 0xba756174393d88df,0x94f971119aeef9e4, + 0xe912b9d1478ceb17,0x7a37cd5601aab85d, + 0x91abb422ccb812ee,0xac62e055c10ab33a, + 0xb616a12b7fe617aa,0x577b986b314d6009, + 0xe39c49765fdf9d94,0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d,0x14588f13be847307, + 0xb1d219647ae6b31c,0x596eb2d8ae258fc8, + 0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb, + 0x8aec23d680043bee,0x25de7bb9480d5854, + 0xada72ccc20054ae9,0xaf561aa79a10ae6a, + 0xd910f7ff28069da4,0x1b2ba1518094da04, + 0x87aa9aff79042286,0x90fb44d2f05d0842, + 0xa99541bf57452b28,0x353a1607ac744a53, + 0xd3fa922f2d1675f2,0x42889b8997915ce8, + 0x847c9b5d7c2e09b7,0x69956135febada11, + 0xa59bc234db398c25,0x43fab9837e699095, + 0xcf02b2c21207ef2e,0x94f967e45e03f4bb, + 0x8161afb94b44f57d,0x1d1be0eebac278f5, + 0xa1ba1ba79e1632dc,0x6462d92a69731732, + 0xca28a291859bbf93,0x7d7b8f7503cfdcfe, + 0xfcb2cb35e702af78,0x5cda735244c3d43e, + 0x9defbf01b061adab,0x3a0888136afa64a7, + 0xc56baec21c7a1916,0x88aaa1845b8fdd0, + 0xf6c69a72a3989f5b,0x8aad549e57273d45, + 0x9a3c2087a63f6399,0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd, + 0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5, + 0x969eb7c47859e743,0x9f644ae5a4b1b325, + 0xbc4665b596706114,0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959,0xa90cb506d155a7ea, + 0x9316ff75dd87cbd8,0x9a7f12442d588f2, + 0xb7dcbf5354e9bece,0xc11ed6d538aeb2f, + 0xe5d3ef282a242e81,0x8f1668c8a86da5fa, + 0x8fa475791a569d10,0xf96e017d694487bc, + 0xb38d92d760ec4455,0x37c981dcc395a9ac, + 0xe070f78d3927556a,0x85bbe253f47b1417, + 0x8c469ab843b89562,0x93956d7478ccec8e, + 0xaf58416654a6babb,0x387ac8d1970027b2, + 0xdb2e51bfe9d0696a,0x6997b05fcc0319e, + 0x88fcf317f22241e2,0x441fece3bdf81f03, + 0xab3c2fddeeaad25a,0xd527e81cad7626c3, + 0xd60b3bd56a5586f1,0x8a71e223d8d3b074, + 0x85c7056562757456,0xf6872d5667844e49, + 0xa738c6bebb12d16c,0xb428f8ac016561db, + 0xd106f86e69d785c7,0xe13336d701beba52, + 0x82a45b450226b39c,0xecc0024661173473, + 0xa34d721642b06084,0x27f002d7f95d0190, + 0xcc20ce9bd35c78a5,0x31ec038df7b441f4, + 0xff290242c83396ce,0x7e67047175a15271, + 0x9f79a169bd203e41,0xf0062c6e984d386, + 0xc75809c42c684dd1,0x52c07b78a3e60868, + 0xf92e0c3537826145,0xa7709a56ccdf8a82, + 0x9bbcc7a142b17ccb,0x88a66076400bb691, + 0xc2abf989935ddbfe,0x6acff893d00ea435, + 0xf356f7ebf83552fe,0x583f6b8c4124d43, + 0x98165af37b2153de,0xc3727a337a8b704a, + 0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c, + 0xeda2ee1c7064130c,0x1162def06f79df73, + 0x9485d4d1c63e8be7,0x8addcb5645ac2ba8, + 0xb9a74a0637ce2ee1,0x6d953e2bd7173692, + 0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0,0x1d9c9892400a22a2, + 0xb54d5e4a127f59c8,0x2503beb6d00cab4b, + 0xe2a0b5dc971f303a,0x2e44ae64840fd61d, + 0x8da471a9de737e24,0x5ceaecfed289e5d2, + 0xb10d8e1456105dad,0x7425a83e872c5f47, + 0xdd50f1996b947518,0xd12f124e28f77719, + 0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f, + 0xace73cbfdc0bfb7b,0x636cc64d1001550b, + 0xd8210befd30efa5a,0x3c47f7e05401aa4e, + 0x8714a775e3e95c78,0x65acfaec34810a71, + 0xa8d9d1535ce3b396,0x7f1839a741a14d0d, + 0xd31045a8341ca07c,0x1ede48111209a050, + 0x83ea2b892091e44d,0x934aed0aab460432, + 0xa4e4b66b68b65d60,0xf81da84d5617853f, + 0xce1de40642e3f4b9,0x36251260ab9d668e, + 0x80d2ae83e9ce78f3,0xc1d72b7c6b426019, + 0xa1075a24e4421730,0xb24cf65b8612f81f, + 0xc94930ae1d529cfc,0xdee033f26797b627, + 0xfb9b7cd9a4a7443c,0x169840ef017da3b1, + 0x9d412e0806e88aa5,0x8e1f289560ee864e, + 0xc491798a08a2ad4e,0xf1a6f2bab92a27e2, + 0xf5b5d7ec8acb58a2,0xae10af696774b1db, + 0x9991a6f3d6bf1765,0xacca6da1e0a8ef29, + 0xbff610b0cc6edd3f,0x17fd090a58d32af3, + 0xeff394dcff8a948e,0xddfc4b4cef07f5b0, + 0x95f83d0a1fb69cd9,0x4abdaf101564f98e, + 0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1, + 0xea53df5fd18d5513,0x84c86189216dc5ed, + 0x92746b9be2f8552c,0x32fd3cf5b4e49bb4, + 0xb7118682dbb66a77,0x3fbc8c33221dc2a1, + 0xe4d5e82392a40515,0xfabaf3feaa5334a, + 0x8f05b1163ba6832d,0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8,0x743e20e9ef511012, + 0xdf78e4b2bd342cf6,0x914da9246b255416, + 0x8bab8eefb6409c1a,0x1ad089b6c2f7548e, + 0xae9672aba3d0c320,0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e, + 0x8865899617fb1871,0x7e2fa67c7a658892, + 0xaa7eebfb9df9de8d,0xddbb901b98feeab7, + 0xd51ea6fa85785631,0x552a74227f3ea565, + 0x8533285c936b35de,0xd53a88958f87275f, + 0xa67ff273b8460356,0x8a892abaf368f137, + 0xd01fef10a657842c,0x2d2b7569b0432d85, + 0x8213f56a67f6b29b,0x9c3b29620e29fc73, + 0xa298f2c501f45f42,0x8349f3ba91b47b8f, + 0xcb3f2f7642717713,0x241c70a936219a73, + 0xfe0efb53d30dd4d7,0xed238cd383aa0110, + 0x9ec95d1463e8a506,0xf4363804324a40aa, + 0xc67bb4597ce2ce48,0xb143c6053edcd0d5, + 0xf81aa16fdc1b81da,0xdd94b7868e94050a, + 0x9b10a4e5e9913128,0xca7cf2b4191c8326, + 0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf,0xbc633b39673c8cec, + 0x976e41088617ca01,0xd5be0503e085d813, + 0xbd49d14aa79dbc82,0x4b2d8644d8a74e18, + 0xec9c459d51852ba2,0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45,0xcabb90e5c942b503, + 0xb8da1662e7b00a17,0x3d6a751f3b936243, + 0xe7109bfba19c0c9d,0xcc512670a783ad4, + 0x906a617d450187e2,0x27fb2b80668b24c5, + 0xb484f9dc9641e9da,0xb1f9f660802dedf6, + 0xe1a63853bbd26451,0x5e7873f8a0396973, + 0x8d07e33455637eb2,0xdb0b487b6423e1e8, + 0xb049dc016abc5e5f,0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7,0x7641a140cc7810fb, + 0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d, + 0xac2820d9623bf429,0x546345fa9fbdcd44, + 0xd732290fbacaf133,0xa97c177947ad4095, + 0x867f59a9d4bed6c0,0x49ed8eabcccc485d, + 0xa81f301449ee8c70,0x5c68f256bfff5a74, + 0xd226fc195c6a2f8c,0x73832eec6fff3111, + 0x83585d8fd9c25db7,0xc831fd53c5ff7eab, + 0xa42e74f3d032f525,0xba3e7ca8b77f5e55, + 0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb, + 0x80444b5e7aa7cf85,0x7980d163cf5b81b3, + 0xa0555e361951c366,0xd7e105bcc332621f, + 0xc86ab5c39fa63440,0x8dd9472bf3fefaa7, + 0xfa856334878fc150,0xb14f98f6f0feb951, + 0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3, + 0xc3b8358109e84f07,0xa862f80ec4700c8, + 0xf4a642e14c6262c8,0xcd27bb612758c0fa, + 0x98e7e9cccfbd7dbd,0x8038d51cb897789c, + 0xbf21e44003acdd2c,0xe0470a63e6bd56c3, + 0xeeea5d5004981478,0x1858ccfce06cac74, + 0x95527a5202df0ccb,0xf37801e0c43ebc8, + 0xbaa718e68396cffd,0xd30560258f54e6ba, + 0xe950df20247c83fd,0x47c6b82ef32a2069, + 0x91d28b7416cdd27e,0x4cdc331d57fa5441, + 0xb6472e511c81471d,0xe0133fe4adf8e952, + 0xe3d8f9e563a198e5,0x58180fddd97723a6, + 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; + +} // namespace internal +} // namespace simdjson +/* end file src/internal/numberparsing_tables.cpp */ +/* begin file src/internal/simdprune_tables.cpp */ +#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 + +#include + +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable +SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256] = { + 0, 2, 2, 4, 2, 4, 4, 6, 2, 4, 4, 6, 4, 6, 6, 8, 2, 4, 4, + 6, 4, 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 2, 4, 4, 6, 4, 6, + 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, + 8, 8, 10, 8, 10, 10, 12, 2, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, + 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, + 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, 8, + 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 2, 4, 4, 6, 4, + 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, + 6, 8, 8, 10, 8, 10, 10, 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, + 10, 8, 10, 10, 12, 6, 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, + 12, 14, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, + 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 6, 8, 8, 10, + 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 8, 10, 10, 12, 10, 12, 12, + 14, 10, 12, 12, 14, 12, 14, 14, 16}; + +SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +}; + +// 256 * 8 bytes = 2kB, easily fits in cache. +SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256] = { + 0x0706050403020100, 0x0007060504030201, 0x0007060504030200, + 0x0000070605040302, 0x0007060504030100, 0x0000070605040301, + 0x0000070605040300, 0x0000000706050403, 0x0007060504020100, + 0x0000070605040201, 0x0000070605040200, 0x0000000706050402, + 0x0000070605040100, 0x0000000706050401, 0x0000000706050400, + 0x0000000007060504, 0x0007060503020100, 0x0000070605030201, + 0x0000070605030200, 0x0000000706050302, 0x0000070605030100, + 0x0000000706050301, 0x0000000706050300, 0x0000000007060503, + 0x0000070605020100, 0x0000000706050201, 0x0000000706050200, + 0x0000000007060502, 0x0000000706050100, 0x0000000007060501, + 0x0000000007060500, 0x0000000000070605, 0x0007060403020100, + 0x0000070604030201, 0x0000070604030200, 0x0000000706040302, + 0x0000070604030100, 0x0000000706040301, 0x0000000706040300, + 0x0000000007060403, 0x0000070604020100, 0x0000000706040201, + 0x0000000706040200, 0x0000000007060402, 0x0000000706040100, + 0x0000000007060401, 0x0000000007060400, 0x0000000000070604, + 0x0000070603020100, 0x0000000706030201, 0x0000000706030200, + 0x0000000007060302, 0x0000000706030100, 0x0000000007060301, + 0x0000000007060300, 0x0000000000070603, 0x0000000706020100, + 0x0000000007060201, 0x0000000007060200, 0x0000000000070602, + 0x0000000007060100, 0x0000000000070601, 0x0000000000070600, + 0x0000000000000706, 0x0007050403020100, 0x0000070504030201, + 0x0000070504030200, 0x0000000705040302, 0x0000070504030100, + 0x0000000705040301, 0x0000000705040300, 0x0000000007050403, + 0x0000070504020100, 0x0000000705040201, 0x0000000705040200, + 0x0000000007050402, 0x0000000705040100, 0x0000000007050401, + 0x0000000007050400, 0x0000000000070504, 0x0000070503020100, + 0x0000000705030201, 0x0000000705030200, 0x0000000007050302, + 0x0000000705030100, 0x0000000007050301, 0x0000000007050300, + 0x0000000000070503, 0x0000000705020100, 0x0000000007050201, + 0x0000000007050200, 0x0000000000070502, 0x0000000007050100, + 0x0000000000070501, 0x0000000000070500, 0x0000000000000705, + 0x0000070403020100, 0x0000000704030201, 0x0000000704030200, + 0x0000000007040302, 0x0000000704030100, 0x0000000007040301, + 0x0000000007040300, 0x0000000000070403, 0x0000000704020100, + 0x0000000007040201, 0x0000000007040200, 0x0000000000070402, + 0x0000000007040100, 0x0000000000070401, 0x0000000000070400, + 0x0000000000000704, 0x0000000703020100, 0x0000000007030201, + 0x0000000007030200, 0x0000000000070302, 0x0000000007030100, + 0x0000000000070301, 0x0000000000070300, 0x0000000000000703, + 0x0000000007020100, 0x0000000000070201, 0x0000000000070200, + 0x0000000000000702, 0x0000000000070100, 0x0000000000000701, + 0x0000000000000700, 0x0000000000000007, 0x0006050403020100, + 0x0000060504030201, 0x0000060504030200, 0x0000000605040302, + 0x0000060504030100, 0x0000000605040301, 0x0000000605040300, + 0x0000000006050403, 0x0000060504020100, 0x0000000605040201, + 0x0000000605040200, 0x0000000006050402, 0x0000000605040100, + 0x0000000006050401, 0x0000000006050400, 0x0000000000060504, + 0x0000060503020100, 0x0000000605030201, 0x0000000605030200, + 0x0000000006050302, 0x0000000605030100, 0x0000000006050301, + 0x0000000006050300, 0x0000000000060503, 0x0000000605020100, + 0x0000000006050201, 0x0000000006050200, 0x0000000000060502, + 0x0000000006050100, 0x0000000000060501, 0x0000000000060500, + 0x0000000000000605, 0x0000060403020100, 0x0000000604030201, + 0x0000000604030200, 0x0000000006040302, 0x0000000604030100, + 0x0000000006040301, 0x0000000006040300, 0x0000000000060403, + 0x0000000604020100, 0x0000000006040201, 0x0000000006040200, + 0x0000000000060402, 0x0000000006040100, 0x0000000000060401, + 0x0000000000060400, 0x0000000000000604, 0x0000000603020100, + 0x0000000006030201, 0x0000000006030200, 0x0000000000060302, + 0x0000000006030100, 0x0000000000060301, 0x0000000000060300, + 0x0000000000000603, 0x0000000006020100, 0x0000000000060201, + 0x0000000000060200, 0x0000000000000602, 0x0000000000060100, + 0x0000000000000601, 0x0000000000000600, 0x0000000000000006, + 0x0000050403020100, 0x0000000504030201, 0x0000000504030200, + 0x0000000005040302, 0x0000000504030100, 0x0000000005040301, + 0x0000000005040300, 0x0000000000050403, 0x0000000504020100, + 0x0000000005040201, 0x0000000005040200, 0x0000000000050402, + 0x0000000005040100, 0x0000000000050401, 0x0000000000050400, + 0x0000000000000504, 0x0000000503020100, 0x0000000005030201, + 0x0000000005030200, 0x0000000000050302, 0x0000000005030100, + 0x0000000000050301, 0x0000000000050300, 0x0000000000000503, + 0x0000000005020100, 0x0000000000050201, 0x0000000000050200, + 0x0000000000000502, 0x0000000000050100, 0x0000000000000501, + 0x0000000000000500, 0x0000000000000005, 0x0000000403020100, + 0x0000000004030201, 0x0000000004030200, 0x0000000000040302, + 0x0000000004030100, 0x0000000000040301, 0x0000000000040300, + 0x0000000000000403, 0x0000000004020100, 0x0000000000040201, + 0x0000000000040200, 0x0000000000000402, 0x0000000000040100, + 0x0000000000000401, 0x0000000000000400, 0x0000000000000004, + 0x0000000003020100, 0x0000000000030201, 0x0000000000030200, + 0x0000000000000302, 0x0000000000030100, 0x0000000000000301, + 0x0000000000000300, 0x0000000000000003, 0x0000000000020100, + 0x0000000000000201, 0x0000000000000200, 0x0000000000000002, + 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, + 0x0000000000000000, +}; //static uint64_t thintable_epi8[256] + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 +/* end file src/internal/simdprune_tables.cpp */ +/* begin file src/implementation.cpp */ +#include + +namespace simdjson { + +bool implementation::supported_by_runtime_system() const { + uint32_t required_instruction_sets = this->required_instruction_sets(); + uint32_t supported_instruction_sets = internal::detect_supported_architectures(); + return ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets); +} + +namespace internal { + +// Static array of known implementations. We're hoping these get baked into the executable +// without requiring a static initializer. + +#if SIMDJSON_IMPLEMENTATION_ICELAKE +static const icelake::implementation* get_icelake_singleton() { + static const icelake::implementation icelake_singleton{}; + return &icelake_singleton; +} +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL +static const haswell::implementation* get_haswell_singleton() { + static const haswell::implementation haswell_singleton{}; + return &haswell_singleton; +} +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE +static const westmere::implementation* get_westmere_singleton() { + static const westmere::implementation westmere_singleton{}; + return &westmere_singleton; +} +#endif // SIMDJSON_IMPLEMENTATION_WESTMERE +#if SIMDJSON_IMPLEMENTATION_ARM64 +static const arm64::implementation* get_arm64_singleton() { + static const arm64::implementation arm64_singleton{}; + return &arm64_singleton; +} +#endif // SIMDJSON_IMPLEMENTATION_ARM64 +#if SIMDJSON_IMPLEMENTATION_PPC64 +static const ppc64::implementation* get_ppc64_singleton() { + static const ppc64::implementation ppc64_singleton{}; + return &ppc64_singleton; +} +#endif // SIMDJSON_IMPLEMENTATION_PPC64 +#if SIMDJSON_IMPLEMENTATION_FALLBACK +static const fallback::implementation* get_fallback_singleton() { + static const fallback::implementation fallback_singleton{}; + return &fallback_singleton; +} +#endif // SIMDJSON_IMPLEMENTATION_FALLBACK + +/** + * @private Detects best supported implementation on first use, and sets it + */ +class detect_best_supported_implementation_on_first_use final : public implementation { +public: + const std::string &name() const noexcept final { return set_best()->name(); } + const std::string &description() const noexcept final { return set_best()->description(); } + uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); } + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final { + return set_best()->create_dom_parser_implementation(capacity, max_length, dst); + } + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final { + return set_best()->minify(buf, len, dst, dst_len); + } + simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override { + return set_best()->validate_utf8(buf, len); + } + simdjson_really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} +private: + const implementation *set_best() const noexcept; +}; + +static const std::initializer_list& get_available_implementation_pointers() { + static const std::initializer_list available_implementation_pointers { +#if SIMDJSON_IMPLEMENTATION_ICELAKE + get_icelake_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL + get_haswell_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE + get_westmere_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_ARM64 + get_arm64_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 + get_ppc64_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_FALLBACK + get_fallback_singleton(), +#endif + }; // available_implementation_pointers + return available_implementation_pointers; +} + +// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support +class unsupported_implementation final : public implementation { +public: + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t, + size_t, + std::unique_ptr& + ) const noexcept final { + return UNSUPPORTED_ARCHITECTURE; + } + simdjson_warn_unused error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final override { + return UNSUPPORTED_ARCHITECTURE; + } + simdjson_warn_unused bool validate_utf8(const char *, size_t) const noexcept final override { + return false; // Just refuse to validate. Given that we have a fallback implementation + // it seems unlikely that unsupported_implementation will ever be used. If it is used, + // then it will flag all strings as invalid. The alternative is to return an error_code + // from which the user has to figure out whether the string is valid UTF-8... which seems + // like a lot of work just to handle the very unlikely case that we have an unsupported + // implementation. And, when it does happen (that we have an unsupported implementation), + // what are the chances that the programmer has a fallback? Given that *we* provide the + // fallback, it implies that the programmer would need a fallback for our fallback. + } + unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {} +}; + +const unsupported_implementation* get_unsupported_singleton() { + static const unsupported_implementation unsupported_singleton{}; + return &unsupported_singleton; +} + +size_t available_implementation_list::size() const noexcept { + return internal::get_available_implementation_pointers().size(); +} +const implementation * const *available_implementation_list::begin() const noexcept { + return internal::get_available_implementation_pointers().begin(); +} +const implementation * const *available_implementation_list::end() const noexcept { + return internal::get_available_implementation_pointers().end(); +} +const implementation *available_implementation_list::detect_best_supported() const noexcept { + // They are prelisted in priority order, so we just go down the list + uint32_t supported_instruction_sets = internal::detect_supported_architectures(); + for (const implementation *impl : internal::get_available_implementation_pointers()) { + uint32_t required_instruction_sets = impl->required_instruction_sets(); + if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; } + } + return get_unsupported_singleton(); // this should never happen? +} + +const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept { + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *force_implementation_name = getenv("SIMDJSON_FORCE_IMPLEMENTATION"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (force_implementation_name) { + auto force_implementation = get_available_implementations()[force_implementation_name]; + if (force_implementation) { + return get_active_implementation() = force_implementation; + } else { + // Note: abort() and stderr usage within the library is forbidden. + return get_active_implementation() = get_unsupported_singleton(); + } + } + return get_active_implementation() = get_available_implementations().detect_best_supported(); +} + +} // namespace internal + +SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations() { + static const internal::available_implementation_list available_implementations{}; + return available_implementations; +} + +SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation() { + static const internal::detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton; + static internal::atomic_ptr active_implementation{&detect_best_supported_implementation_on_first_use_singleton}; + return active_implementation; +} + +simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept { + return get_active_implementation()->minify(reinterpret_cast(buf), len, reinterpret_cast(dst), dst_len); +} +simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { + return get_active_implementation()->validate_utf8(buf, len); +} + +const implementation * builtin_implementation() { + static const implementation * builtin_impl = get_available_implementations()[SIMDJSON_STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)]; + assert(builtin_impl); + return builtin_impl; +} + + +} // namespace simdjson +/* end file src/implementation.cpp */ + +#if SIMDJSON_IMPLEMENTATION_ARM64 +/* begin file src/arm64/implementation.cpp */ +/* begin file include/simdjson/arm64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "arm64" +// #define SIMDJSON_IMPLEMENTATION arm64 +/* end file include/simdjson/arm64/begin.h */ + +namespace simdjson { +namespace arm64 { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace arm64 +} // namespace simdjson + +/* begin file include/simdjson/arm64/end.h */ +/* end file include/simdjson/arm64/end.h */ +/* end file src/arm64/implementation.cpp */ +/* begin file src/arm64/dom_parser_implementation.cpp */ +/* begin file include/simdjson/arm64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "arm64" +// #define SIMDJSON_IMPLEMENTATION arm64 +/* end file include/simdjson/arm64/begin.h */ + +// +// Stage 1 +// +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +struct json_character_block { + static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_really_inline uint64_t op() const noexcept { return _op; } + simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // Functional programming causes trouble with Visual Studio. + // Keeping this version in comments since it is much nicer: + // auto v = in.map([&](simd8 chunk) { + // auto nib_lo = chunk & 0xf; + // auto nib_hi = chunk.shr<4>(); + // auto shuf_lo = nib_lo.lookup_16(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + // auto shuf_hi = nib_hi.lookup_16(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + // return shuf_lo & shuf_hi; + // }); + const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + + simd8x64 v( + (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), + (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), + (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), + (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) + ); + + + // We compute whitespace and op separately. If the code later only use one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). *However* if we only need spaces, + // it is likely that we will still compute 'v' above with two lookup_16: one + // could do it a bit cheaper. This is in contrast with the x64 implementations + // where we can, efficiently, do the white space and structural matching + // separately. One reason for this difference is that on ARM NEON, the table + // lookups either zero or leave unchanged the characters exceeding 0xF whereas + // on x64, the equivalent instruction (pshufb) automatically applies a mask, + // ignoring the 4 most significant bits. Thus the x64 implementation is + // optimized differently. This being said, if you use this code strictly + // just for minification (or just to identify the structural characters), + // there is a small untaken optimization opportunity here. We deliberately + // do not pick it up. + + uint64_t op = simd8x64( + v.chunks[0].any_bits_set(0x7), + v.chunks[1].any_bits_set(0x7), + v.chunks[2].any_bits_set(0x7), + v.chunks[3].any_bits_set(0x7) + ).to_bitmask(); + + uint64_t whitespace = simd8x64( + v.chunks[0].any_bits_set(0x18), + v.chunks[1].any_bits_set(0x18), + v.chunks[2].any_bits_set(0x18), + v.chunks[3].any_bits_set(0x18) + ).to_bitmask(); + + return { whitespace, op }; +} + +simdjson_really_inline bool is_ascii(const simd8x64& input) { + simd8 bits = input.reduce_or(); + return bits.max_val() < 0b10000000u; +} + +simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well. + // This will work fine because we only have to report errors for cases with 0-1 lead bytes. + // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is + // guaranteed to be at least *one* lead byte that is part of only 1 other multibyte character. + // The error will be detected there. + return is_second_byte ^ is_third_byte ^ is_fourth_byte; +} + +simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); + simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); + return is_third_byte ^ is_fourth_byte; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ +namespace simdjson { +namespace arm64 { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_really_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + if(simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } if(simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if(simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_really_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ +/* begin file src/generic/stage1/json_structural_indexer.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +/* begin file src/generic/stage1/buf_block_reader.h */ +namespace simdjson { +namespace arm64 { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_really_inline size_t block_index(); + simdjson_really_inline bool has_full_block() const; + simdjson_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_really_inline size_t get_remainder(uint8_t *dst) const; + simdjson_really_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/buf_block_reader.h */ +/* begin file src/generic/stage1/json_string_scanner.h */ +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) + simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Start quotes of strings + simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + // End quotes of strings + simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // backslash characters + uint64_t _backslash; + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-backslashed ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Intended to be defined by the implementation + simdjson_really_inline uint64_t find_escaped(uint64_t escape); + simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; +}; + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + // If there was overflow, pretend the first character isn't a backslash + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash); + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, John Regher from Utah U. says this is fine code + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block( + backslash, + escaped, + quote, + in_string + ); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/json_string_scanner.h */ +/* begin file src/generic/stage1/json_scanner.h */ +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() {} + simdjson_really_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_really_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/json_scanner.h */ +/* begin file src/generic/stage1/json_minifier.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_really_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/json_minifier.h */ +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace arm64 { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own + // version of the code. +#ifdef SIMDJSON_CUSTOM_BIT_INDEXER + simdjson_really_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); + checker.check_next_input(in); + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/json_structural_indexer.h */ +/* begin file src/generic/stage1/utf8_validator.h */ +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/utf8_validator.h */ + +// +// Stage 2 +// + +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace arm64 { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_really_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_really_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_really_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_really_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_really_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_really_inline bool at_beginning() const noexcept; + simdjson_really_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_really_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_really_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_really_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_really_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_really_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_really_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_really_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_really_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_really_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_really_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_really_inline tape_builder(dom::document &doc) noexcept; + + simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_really_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + // On ARM, we don't short-circuit this if there are no backslashes, because the branch gives us no + // benefit and therefore makes things worse. + // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } + return find_escaped_branchless(backslash); +} + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return arm64::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace arm64 +} // namespace simdjson + +/* begin file include/simdjson/arm64/end.h */ +/* end file include/simdjson/arm64/end.h */ +/* end file src/arm64/dom_parser_implementation.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_FALLBACK +/* begin file src/fallback/implementation.cpp */ +/* begin file include/simdjson/fallback/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "fallback" +// #define SIMDJSON_IMPLEMENTATION fallback +/* end file include/simdjson/fallback/begin.h */ + +namespace simdjson { +namespace fallback { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace fallback +} // namespace simdjson + +/* begin file include/simdjson/fallback/end.h */ +/* end file include/simdjson/fallback/end.h */ +/* end file src/fallback/implementation.cpp */ +/* begin file src/fallback/dom_parser_implementation.cpp */ +/* begin file include/simdjson/fallback/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "fallback" +// #define SIMDJSON_IMPLEMENTATION fallback +/* end file include/simdjson/fallback/begin.h */ + +// +// Stage 1 +// +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace fallback { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace fallback { +namespace { +namespace stage1 { + +class structural_scanner { +public: + +simdjson_really_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial) + : buf{_parser.buf}, + next_structural_index{_parser.structural_indexes.get()}, + parser{_parser}, + len{static_cast(_parser.len)}, + partial{_partial} { +} + +simdjson_really_inline void add_structural() { + *next_structural_index = idx; + next_structural_index++; +} + +simdjson_really_inline bool is_continuation(uint8_t c) { + return (c & 0b11000000) == 0b10000000; +} + +simdjson_really_inline void validate_utf8_character() { + // Continuation + if (simdjson_unlikely((buf[idx] & 0b01000000) == 0)) { + // extra continuation + error = UTF8_ERROR; + idx++; + return; + } + + // 2-byte + if ((buf[idx] & 0b00100000) == 0) { + // missing continuation + if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) { + if (idx+1 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 1100000_ 10______ + if (buf[idx] <= 0b11000001) { error = UTF8_ERROR; } + idx += 2; + return; + } + + // 3-byte + if ((buf[idx] & 0b00010000) == 0) { + // missing continuation + if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) { + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 11100000 100_____ ________ + if (buf[idx] == 0b11100000 && buf[idx+1] <= 0b10011111) { error = UTF8_ERROR; } + // surrogates: U+D800-U+DFFF 11101101 101_____ + if (buf[idx] == 0b11101101 && buf[idx+1] >= 0b10100000) { error = UTF8_ERROR; } + idx += 3; + return; + } + + // 4-byte + // missing continuation + if (simdjson_unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) { + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 11110000 1000____ ________ ________ + if (buf[idx] == 0b11110000 && buf[idx+1] <= 0b10001111) { error = UTF8_ERROR; } + // too large: > U+10FFFF: + // 11110100 (1001|101_)____ + // 1111(1___|011_|0101) 10______ + // also includes 5, 6, 7 and 8 byte characters: + // 11111___ + if (buf[idx] == 0b11110100 && buf[idx+1] >= 0b10010000) { error = UTF8_ERROR; } + if (buf[idx] >= 0b11110101) { error = UTF8_ERROR; } + idx += 4; +} + +// Returns true if the string is unclosed. +simdjson_really_inline bool validate_string() { + idx++; // skip first quote + while (idx < len && buf[idx] != '"') { + if (buf[idx] == '\\') { + idx += 2; + } else if (simdjson_unlikely(buf[idx] & 0b10000000)) { + validate_utf8_character(); + } else { + if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; } + idx++; + } + } + if (idx >= len) { return true; } + return false; +} + +simdjson_really_inline bool is_whitespace_or_operator(uint8_t c) { + switch (c) { + case '{': case '}': case '[': case ']': case ',': case ':': + case ' ': case '\r': case '\n': case '\t': + return true; + default: + return false; + } +} + +// +// Parse the entire input in STEP_SIZE-byte chunks. +// +simdjson_really_inline error_code scan() { + bool unclosed_string = false; + for (;idx 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + parser.n_structural_indexes = new_structural_indexes; + } else if(partial == stage1_mode::streaming_final) { + if(unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (parser.n_structural_indexes == 0) { return EMPTY; } + } else if(unclosed_string) { error = UNCLOSED_STRING; } + return error; +} + +private: + const uint8_t *buf; + uint32_t *next_structural_index; + dom_parser_implementation &parser; + uint32_t len; + uint32_t idx{0}; + error_code error{SUCCESS}; + stage1_mode partial; +}; // structural_scanner + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode partial) noexcept { + this->buf = _buf; + this->len = _len; + stage1::structural_scanner scanner(*this, partial); + return scanner.scan(); +} + +// big table for the minifier +static uint8_t jump_table[256 * 3] = { + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, +}; + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + size_t i = 0, pos = 0; + uint8_t quote = 0; + uint8_t nonescape = 1; + + while (i < len) { + unsigned char c = buf[i]; + uint8_t *meta = jump_table + 3 * c; + + quote = quote ^ (meta[0] & nonescape); + dst[pos] = c; + pos += meta[2] | quote; + + i += 1; + nonescape = uint8_t(~nonescape) | (meta[1]); + } + dst_len = pos; // we intentionally do not work with a reference + // for fear of aliasing + return quote ? UNCLOSED_STRING : SUCCESS; +} + +// credit: based on code from Google Fuchsia (Apache Licensed) +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + const uint8_t *data = reinterpret_cast(buf); + uint64_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + // check of the next 8 bytes are ascii. + uint64_t next_pos = pos + 16; + if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v1; + memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + unsigned char byte = data[pos]; + if (byte < 0b10000000) { + pos++; + continue; + } else if ((byte & 0b11100000) == 0b11000000) { + next_pos = pos + 2; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } + // range check + code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + if (code_point < 0x80 || 0x7ff < code_point) { return false; } + } else if ((byte & 0b11110000) == 0b11100000) { + next_pos = pos + 3; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; } + // range check + code_point = (byte & 0b00001111) << 12 | + (data[pos + 1] & 0b00111111) << 6 | + (data[pos + 2] & 0b00111111); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return false; + } + } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } + if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; } + if ((data[pos + 3] & 0b11000000) != 0b10000000) { return false; } + // range check + code_point = + (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | + (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + if (code_point <= 0xffff || 0x10ffff < code_point) { return false; } + } else { + // we may have a continuation + return false; + } + pos = next_pos; + } + return true; +} + +} // namespace fallback +} // namespace simdjson + +// +// Stage 2 +// +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace fallback { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_really_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_really_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_really_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_really_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_really_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_really_inline bool at_beginning() const noexcept; + simdjson_really_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_really_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace fallback { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_really_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_really_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_really_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_really_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_really_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_really_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_really_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_really_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace fallback { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_really_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_really_inline tape_builder(dom::document &doc) noexcept; + + simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_really_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +namespace simdjson { +namespace fallback { + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace fallback +} // namespace simdjson + +/* begin file include/simdjson/fallback/end.h */ +/* end file include/simdjson/fallback/end.h */ +/* end file src/fallback/dom_parser_implementation.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_ICELAKE +/* begin file src/icelake/implementation.cpp */ +/* begin file include/simdjson/icelake/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "icelake" +// #define SIMDJSON_IMPLEMENTATION icelake +SIMDJSON_TARGET_ICELAKE +/* end file include/simdjson/icelake/begin.h */ + +namespace simdjson { +namespace icelake { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace icelake +} // namespace simdjson + +/* begin file include/simdjson/icelake/end.h */ +SIMDJSON_UNTARGET_ICELAKE +/* end file include/simdjson/icelake/end.h */ + +/* end file src/icelake/implementation.cpp */ +/* begin file src/icelake/dom_parser_implementation.cpp */ +/* begin file include/simdjson/icelake/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "icelake" +// #define SIMDJSON_IMPLEMENTATION icelake +SIMDJSON_TARGET_ICELAKE +/* end file include/simdjson/icelake/begin.h */ + +// +// Stage 1 +// + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +struct json_character_block { + static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + // ASCII white-space ('\r','\n','\t',' ') + simdjson_really_inline uint64_t whitespace() const noexcept; + // non-quote structural characters (comma, colon, braces, brackets) + simdjson_really_inline uint64_t op() const noexcept; + // neither a structural character nor a white-space, so letters, numbers and quotes + simdjson_really_inline uint64_t scalar() const noexcept; + + uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') + uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) +}; + +simdjson_really_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } +simdjson_really_inline uint64_t json_character_block::op() const noexcept { return _op; } +simdjson_really_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } + +// This identifies structural characters (comma, colon, braces, brackets), +// and ASCII white-space ('\r','\n','\t',' '). +simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If later code only uses one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + const uint64_t whitespace = in.eq({ + _mm512_shuffle_epi8(whitespace_table, in.chunks[0]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm512_shuffle_epi8(op_table, in.chunks[0]) + }); + + return { whitespace, op }; +} + +simdjson_really_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ +namespace simdjson { +namespace icelake { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_really_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + if(simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } if(simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if(simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_really_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ +// defining SIMDJSON_CUSTOM_BIT_INDEXER allows us to provide our own bit_indexer::write +#define SIMDJSON_CUSTOM_BIT_INDEXER +/* begin file src/generic/stage1/json_structural_indexer.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +/* begin file src/generic/stage1/buf_block_reader.h */ +namespace simdjson { +namespace icelake { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_really_inline size_t block_index(); + simdjson_really_inline bool has_full_block() const; + simdjson_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_really_inline size_t get_remainder(uint8_t *dst) const; + simdjson_really_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/buf_block_reader.h */ +/* begin file src/generic/stage1/json_string_scanner.h */ +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) + simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Start quotes of strings + simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + // End quotes of strings + simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // backslash characters + uint64_t _backslash; + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-backslashed ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Intended to be defined by the implementation + simdjson_really_inline uint64_t find_escaped(uint64_t escape); + simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; +}; + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + // If there was overflow, pretend the first character isn't a backslash + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash); + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, John Regher from Utah U. says this is fine code + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block( + backslash, + escaped, + quote, + in_string + ); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/json_string_scanner.h */ +/* begin file src/generic/stage1/json_scanner.h */ +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() {} + simdjson_really_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_really_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/json_scanner.h */ +/* begin file src/generic/stage1/json_minifier.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_really_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/json_minifier.h */ +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace icelake { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own + // version of the code. +#ifdef SIMDJSON_CUSTOM_BIT_INDEXER + simdjson_really_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); + checker.check_next_input(in); + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/json_structural_indexer.h */ +// We must not forget to undefine it now: +#undef SIMDJSON_CUSTOM_BIT_INDEXER + +/** + * We provide a custom version of bit_indexer::write using + * naked intrinsics. + * TODO: make this code more elegant. + */ +// Under GCC 12, the intrinsic _mm512_extracti32x4_epi32 may generate 'maybe uninitialized'. +// as a workaround, we disable warnings within the following function. +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +namespace simdjson { namespace icelake { namespace { namespace stage1 { +simdjson_really_inline void bit_indexer::write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) { return; } + + const __m512i indexes = _mm512_maskz_compress_epi8(bits, _mm512_set_epi32( + 0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130, + 0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120, + 0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110, + 0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100 + )); + const __m512i start_index = _mm512_set1_epi32(idx); + + const auto count = count_ones(bits); + __m512i t0 = _mm512_cvtepu8_epi32(_mm512_castsi512_si128(indexes)); + _mm512_storeu_si512(this->tail, _mm512_add_epi32(t0, start_index)); + + if(count > 16) { + const __m512i t1 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 1)); + _mm512_storeu_si512(this->tail + 16, _mm512_add_epi32(t1, start_index)); + if(count > 32) { + const __m512i t2 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 2)); + _mm512_storeu_si512(this->tail + 32, _mm512_add_epi32(t2, start_index)); + if(count > 48) { + const __m512i t3 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 3)); + _mm512_storeu_si512(this->tail + 48, _mm512_add_epi32(t3, start_index)); + } + } + } + this->tail += count; +} +}}}} +SIMDJSON_POP_DISABLE_WARNINGS + +/* begin file src/generic/stage1/utf8_validator.h */ +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/utf8_validator.h */ + +// +// Stage 2 +// +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace icelake { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_really_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_really_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_really_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_really_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_really_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_really_inline bool at_beginning() const noexcept; + simdjson_really_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_really_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_really_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_really_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_really_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_really_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_really_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_really_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_really_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_really_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_really_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_really_inline tape_builder(dom::document &doc) noexcept; + + simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_really_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } + return find_escaped_branchless(backslash); +} + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return icelake::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return icelake::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return icelake::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace icelake +} // namespace simdjson + +/* begin file include/simdjson/icelake/end.h */ +SIMDJSON_UNTARGET_ICELAKE +/* end file include/simdjson/icelake/end.h */ +/* end file src/icelake/dom_parser_implementation.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL +/* begin file src/haswell/implementation.cpp */ +/* begin file include/simdjson/haswell/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "haswell" +// #define SIMDJSON_IMPLEMENTATION haswell +SIMDJSON_TARGET_HASWELL +/* end file include/simdjson/haswell/begin.h */ + +namespace simdjson { +namespace haswell { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace haswell +} // namespace simdjson + +/* begin file include/simdjson/haswell/end.h */ +SIMDJSON_UNTARGET_HASWELL +/* end file include/simdjson/haswell/end.h */ + +/* end file src/haswell/implementation.cpp */ +/* begin file src/haswell/dom_parser_implementation.cpp */ +/* begin file include/simdjson/haswell/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "haswell" +// #define SIMDJSON_IMPLEMENTATION haswell +SIMDJSON_TARGET_HASWELL +/* end file include/simdjson/haswell/begin.h */ + +// +// Stage 1 +// + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +struct json_character_block { + static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + // ASCII white-space ('\r','\n','\t',' ') + simdjson_really_inline uint64_t whitespace() const noexcept; + // non-quote structural characters (comma, colon, braces, brackets) + simdjson_really_inline uint64_t op() const noexcept; + // neither a structural character nor a white-space, so letters, numbers and quotes + simdjson_really_inline uint64_t scalar() const noexcept; + + uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') + uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) +}; + +simdjson_really_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } +simdjson_really_inline uint64_t json_character_block::op() const noexcept { return _op; } +simdjson_really_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } + +// This identifies structural characters (comma, colon, braces, brackets), +// and ASCII white-space ('\r','\n','\t',' '). +simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If later code only uses one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + const uint64_t whitespace = in.eq({ + _mm256_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm256_shuffle_epi8(whitespace_table, in.chunks[1]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm256_shuffle_epi8(op_table, in.chunks[0]), + _mm256_shuffle_epi8(op_table, in.chunks[1]) + }); + + return { whitespace, op }; +} + +simdjson_really_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ +namespace simdjson { +namespace haswell { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_really_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + if(simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } if(simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if(simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_really_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ +/* begin file src/generic/stage1/json_structural_indexer.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +/* begin file src/generic/stage1/buf_block_reader.h */ +namespace simdjson { +namespace haswell { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_really_inline size_t block_index(); + simdjson_really_inline bool has_full_block() const; + simdjson_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_really_inline size_t get_remainder(uint8_t *dst) const; + simdjson_really_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/buf_block_reader.h */ +/* begin file src/generic/stage1/json_string_scanner.h */ +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) + simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Start quotes of strings + simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + // End quotes of strings + simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // backslash characters + uint64_t _backslash; + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-backslashed ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Intended to be defined by the implementation + simdjson_really_inline uint64_t find_escaped(uint64_t escape); + simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; +}; + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + // If there was overflow, pretend the first character isn't a backslash + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash); + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, John Regher from Utah U. says this is fine code + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block( + backslash, + escaped, + quote, + in_string + ); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/json_string_scanner.h */ +/* begin file src/generic/stage1/json_scanner.h */ +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() {} + simdjson_really_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_really_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/json_scanner.h */ +/* begin file src/generic/stage1/json_minifier.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_really_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/json_minifier.h */ +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace haswell { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own + // version of the code. +#ifdef SIMDJSON_CUSTOM_BIT_INDEXER + simdjson_really_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); + checker.check_next_input(in); + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/json_structural_indexer.h */ +/* begin file src/generic/stage1/utf8_validator.h */ +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/utf8_validator.h */ + +// +// Stage 2 +// +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace haswell { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_really_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_really_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_really_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_really_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_really_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_really_inline bool at_beginning() const noexcept; + simdjson_really_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_really_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_really_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_really_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_really_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_really_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_really_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_really_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_really_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_really_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_really_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_really_inline tape_builder(dom::document &doc) noexcept; + + simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_really_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } + return find_escaped_branchless(backslash); +} + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return haswell::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace haswell +} // namespace simdjson + +/* begin file include/simdjson/haswell/end.h */ +SIMDJSON_UNTARGET_HASWELL +/* end file include/simdjson/haswell/end.h */ +/* end file src/haswell/dom_parser_implementation.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 +/* begin file src/ppc64/implementation.cpp */ +/* begin file include/simdjson/ppc64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "ppc64" +// #define SIMDJSON_IMPLEMENTATION ppc64 +/* end file include/simdjson/ppc64/begin.h */ + +namespace simdjson { +namespace ppc64 { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace ppc64 +} // namespace simdjson + +/* begin file include/simdjson/ppc64/end.h */ +/* end file include/simdjson/ppc64/end.h */ +/* end file src/ppc64/implementation.cpp */ +/* begin file src/ppc64/dom_parser_implementation.cpp */ +/* begin file include/simdjson/ppc64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "ppc64" +// #define SIMDJSON_IMPLEMENTATION ppc64 +/* end file include/simdjson/ppc64/begin.h */ + +// +// Stage 1 +// +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +struct json_character_block { + static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_really_inline uint64_t op() const noexcept { return _op; } + simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + + simd8x64 v( + (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), + (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), + (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), + (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) + ); + + uint64_t op = simd8x64( + v.chunks[0].any_bits_set(0x7), + v.chunks[1].any_bits_set(0x7), + v.chunks[2].any_bits_set(0x7), + v.chunks[3].any_bits_set(0x7) + ).to_bitmask(); + + uint64_t whitespace = simd8x64( + v.chunks[0].any_bits_set(0x18), + v.chunks[1].any_bits_set(0x18), + v.chunks[2].any_bits_set(0x18), + v.chunks[3].any_bits_set(0x18) + ).to_bitmask(); + + return { whitespace, op }; +} + +simdjson_really_inline bool is_ascii(const simd8x64& input) { + // careful: 0x80 is not ascii. + return input.reduce_or().saturating_sub(0b01111111u).bits_not_set_anywhere(); +} + +simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ +namespace simdjson { +namespace ppc64 { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_really_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + if(simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } if(simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if(simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_really_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ +/* begin file src/generic/stage1/json_structural_indexer.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +/* begin file src/generic/stage1/buf_block_reader.h */ +namespace simdjson { +namespace ppc64 { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_really_inline size_t block_index(); + simdjson_really_inline bool has_full_block() const; + simdjson_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_really_inline size_t get_remainder(uint8_t *dst) const; + simdjson_really_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/buf_block_reader.h */ +/* begin file src/generic/stage1/json_string_scanner.h */ +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) + simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Start quotes of strings + simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + // End quotes of strings + simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // backslash characters + uint64_t _backslash; + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-backslashed ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Intended to be defined by the implementation + simdjson_really_inline uint64_t find_escaped(uint64_t escape); + simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; +}; + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + // If there was overflow, pretend the first character isn't a backslash + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash); + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, John Regher from Utah U. says this is fine code + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block( + backslash, + escaped, + quote, + in_string + ); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/json_string_scanner.h */ +/* begin file src/generic/stage1/json_scanner.h */ +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() {} + simdjson_really_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_really_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/json_scanner.h */ +/* begin file src/generic/stage1/json_minifier.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_really_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/json_minifier.h */ +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace ppc64 { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own + // version of the code. +#ifdef SIMDJSON_CUSTOM_BIT_INDEXER + simdjson_really_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); + checker.check_next_input(in); + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/json_structural_indexer.h */ +/* begin file src/generic/stage1/utf8_validator.h */ +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/utf8_validator.h */ + +// +// Stage 2 +// + +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace ppc64 { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_really_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_really_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_really_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_really_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_really_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_really_inline bool at_beginning() const noexcept; + simdjson_really_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_really_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_really_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_really_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_really_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_really_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_really_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_really_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_really_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_really_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_really_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_really_inline tape_builder(dom::document &doc) noexcept; + + simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_really_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + // On PPC, we don't short-circuit this if there are no backslashes, because the branch gives us no + // benefit and therefore makes things worse. + // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } + return find_escaped_branchless(backslash); +} + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return ppc64::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace ppc64 +} // namespace simdjson + +/* begin file include/simdjson/ppc64/end.h */ +/* end file include/simdjson/ppc64/end.h */ +/* end file src/ppc64/dom_parser_implementation.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE +/* begin file src/westmere/implementation.cpp */ +/* begin file include/simdjson/westmere/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "westmere" +// #define SIMDJSON_IMPLEMENTATION westmere +SIMDJSON_TARGET_WESTMERE +/* end file include/simdjson/westmere/begin.h */ + +namespace simdjson { +namespace westmere { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace westmere +} // namespace simdjson + +/* begin file include/simdjson/westmere/end.h */ +SIMDJSON_UNTARGET_WESTMERE +/* end file include/simdjson/westmere/end.h */ +/* end file src/westmere/implementation.cpp */ +/* begin file src/westmere/dom_parser_implementation.cpp */ +/* begin file include/simdjson/westmere/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "westmere" +// #define SIMDJSON_IMPLEMENTATION westmere +SIMDJSON_TARGET_WESTMERE +/* end file include/simdjson/westmere/begin.h */ + +// +// Stage 1 +// + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +struct json_character_block { + static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_really_inline uint64_t op() const noexcept { return _op; } + simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If the code later only use one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + + const uint64_t whitespace = in.eq({ + _mm_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm_shuffle_epi8(whitespace_table, in.chunks[1]), + _mm_shuffle_epi8(whitespace_table, in.chunks[2]), + _mm_shuffle_epi8(whitespace_table, in.chunks[3]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20, + in.chunks[2] | 0x20, + in.chunks[3] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm_shuffle_epi8(op_table, in.chunks[0]), + _mm_shuffle_epi8(op_table, in.chunks[1]), + _mm_shuffle_epi8(op_table, in.chunks[2]), + _mm_shuffle_epi8(op_table, in.chunks[3]) + }); + return { whitespace, op }; +} + +simdjson_really_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ +namespace simdjson { +namespace westmere { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_really_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_really_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + + simdjson_really_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + if(simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } if(simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else if(simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_really_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ +/* begin file src/generic/stage1/json_structural_indexer.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +/* begin file src/generic/stage1/buf_block_reader.h */ +namespace simdjson { +namespace westmere { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_really_inline size_t block_index(); + simdjson_really_inline bool has_full_block() const; + simdjson_really_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_really_inline size_t get_remainder(uint8_t *dst) const; + simdjson_really_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_really_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_really_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/buf_block_reader.h */ +/* begin file src/generic/stage1/json_string_scanner.h */ +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) + simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Start quotes of strings + simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + // End quotes of strings + simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // backslash characters + uint64_t _backslash; + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-backslashed ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Intended to be defined by the implementation + simdjson_really_inline uint64_t find_escaped(uint64_t escape); + simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; +}; + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + // If there was overflow, pretend the first character isn't a backslash + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash); + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, John Regher from Utah U. says this is fine code + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block( + backslash, + escaped, + quote, + in_string + ); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/json_string_scanner.h */ +/* begin file src/generic/stage1/json_scanner.h */ +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() {} + simdjson_really_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_really_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/json_scanner.h */ +/* begin file src/generic/stage1/json_minifier.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_really_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/json_minifier.h */ +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace westmere { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own + // version of the code. +#ifdef SIMDJSON_CUSTOM_BIT_INDEXER + simdjson_really_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); + checker.check_next_input(in); + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/json_structural_indexer.h */ +/* begin file src/generic/stage1/utf8_validator.h */ +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/utf8_validator.h */ + +// +// Stage 2 +// +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace westmere { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_really_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_really_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_really_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_really_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_really_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_really_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_really_inline bool at_beginning() const noexcept; + simdjson_really_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_really_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_really_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_really_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_really_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_really_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_really_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_really_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_really_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_really_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_really_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_really_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_really_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_really_inline tape_builder(dom::document &doc) noexcept; + + simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_really_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +// +// Implementation-specific overrides +// + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } + return find_escaped_branchless(backslash); +} + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return westmere::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace westmere +} // namespace simdjson + +/* begin file include/simdjson/westmere/end.h */ +SIMDJSON_UNTARGET_WESTMERE +/* end file include/simdjson/westmere/end.h */ +/* end file src/westmere/dom_parser_implementation.cpp */ +#endif + +SIMDJSON_POP_DISABLE_WARNINGS +/* end file src/simdjson.cpp */ diff --git a/simdjson/simdjson_source/simdjson.h b/simdjson/simdjson_source/simdjson.h new file mode 100644 index 0000000..67cb53b --- /dev/null +++ b/simdjson/simdjson_source/simdjson.h @@ -0,0 +1,32135 @@ +/* auto-generated on 2022-06-02 13:56:20 -0400. Do not edit! */ +/* begin file include/simdjson.h */ +#ifndef SIMDJSON_H +#define SIMDJSON_H + +/** + * @mainpage + * + * Check the [README.md](https://github.com/simdjson/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second). + * + * Sample code. See https://github.com/simdjson/simdjson/blob/master/doc/basics.md for more examples. + + #include "simdjson.h" + + int main(void) { + // load from `twitter.json` file: + simdjson::dom::parser parser; + simdjson::dom::element tweets = parser.load("twitter.json"); + std::cout << tweets["search_metadata"]["count"] << " results." << std::endl; + + // Parse and iterate through an array of objects + auto abstract_json = R"( [ + { "12345" : {"a":12.34, "b":56.78, "c": 9998877} }, + { "12545" : {"a":11.44, "b":12.78, "c": 11111111} } + ] )"_padded; + + for (simdjson::dom::object obj : parser.parse(abstract_json)) { + for(const auto key_value : obj) { + cout << "key: " << key_value.key << " : "; + simdjson::dom::object innerobj = key_value.value; + cout << "a: " << double(innerobj["a"]) << ", "; + cout << "b: " << double(innerobj["b"]) << ", "; + cout << "c: " << int64_t(innerobj["c"]) << endl; + } + } + } + */ + +/* begin file include/simdjson/simdjson_version.h */ +// /include/simdjson/simdjson_version.h automatically generated by release.py, +// do not change by hand +#ifndef SIMDJSON_SIMDJSON_VERSION_H +#define SIMDJSON_SIMDJSON_VERSION_H + +/** The version of simdjson being used (major.minor.revision) */ +#define SIMDJSON_VERSION 2.0.3 + +namespace simdjson { +enum { + /** + * The major version (MAJOR.minor.revision) of simdjson being used. + */ + SIMDJSON_VERSION_MAJOR = 2, + /** + * The minor version (major.MINOR.revision) of simdjson being used. + */ + SIMDJSON_VERSION_MINOR = 0, + /** + * The revision (major.minor.REVISION) of simdjson being used. + */ + SIMDJSON_VERSION_REVISION = 3 +}; +} // namespace simdjson + +#endif // SIMDJSON_SIMDJSON_VERSION_H +/* end file include/simdjson/simdjson_version.h */ +/* begin file include/simdjson/dom.h */ +#ifndef SIMDJSON_DOM_H +#define SIMDJSON_DOM_H + +/* begin file include/simdjson/base.h */ +#ifndef SIMDJSON_BASE_H +#define SIMDJSON_BASE_H + +/* begin file include/simdjson/compiler_check.h */ +#ifndef SIMDJSON_COMPILER_CHECK_H +#define SIMDJSON_COMPILER_CHECK_H + +#ifndef __cplusplus +#error simdjson requires a C++ compiler +#endif + +#ifndef SIMDJSON_CPLUSPLUS +#if defined(_MSVC_LANG) && !defined(__clang__) +#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) +#else +#define SIMDJSON_CPLUSPLUS __cplusplus +#endif +#endif + +// C++ 17 +#if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) +#define SIMDJSON_CPLUSPLUS17 1 +#endif + +// C++ 14 +#if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L) +#define SIMDJSON_CPLUSPLUS14 1 +#endif + +// C++ 11 +#if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L) +#define SIMDJSON_CPLUSPLUS11 1 +#endif + +#ifndef SIMDJSON_CPLUSPLUS11 +#error simdjson requires a compiler compliant with the C++11 standard +#endif + +#endif // SIMDJSON_COMPILER_CHECK_H +/* end file include/simdjson/compiler_check.h */ +/* begin file include/simdjson/common_defs.h */ +#ifndef SIMDJSON_COMMON_DEFS_H +#define SIMDJSON_COMMON_DEFS_H + +#include +/* begin file include/simdjson/portability.h */ +#ifndef SIMDJSON_PORTABILITY_H +#define SIMDJSON_PORTABILITY_H + +#include +#include +#include +#include +#include +#ifndef _WIN32 +// strcasecmp, strncasecmp +#include +#endif + +#ifdef _MSC_VER +#define SIMDJSON_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + * + * Under clang for Windows, we enable: + * * target pragmas so that part and only part of the + * code gets compiled for advanced instructions. + * + */ +#ifdef __clang__ +// clang under visual studio +#define SIMDJSON_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define SIMDJSON_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +// https://en.wikipedia.org/wiki/C_alternative_tokens +// This header should have no effect, except maybe +// under Visual Studio. +#include +#endif + +#if defined(__x86_64__) || defined(_M_AMD64) +#define SIMDJSON_IS_X86_64 1 +#elif defined(__aarch64__) || defined(_M_ARM64) +#define SIMDJSON_IS_ARM64 1 +#elif defined(__PPC64__) || defined(_M_PPC64) +#define SIMDJSON_IS_PPC64 1 +#else +#define SIMDJSON_IS_32BITS 1 + +// We do not support 32-bit platforms, but it can be +// handy to identify them. +#if defined(_M_IX86) || defined(__i386__) +#define SIMDJSON_IS_X86_32BITS 1 +#elif defined(__arm__) || defined(_M_ARM) +#define SIMDJSON_IS_ARM_32BITS 1 +#elif defined(__PPC__) || defined(_M_PPC) +#define SIMDJSON_IS_PPC_32BITS 1 +#endif + +#endif // defined(__x86_64__) || defined(_M_AMD64) + +#ifdef SIMDJSON_IS_32BITS +#ifndef SIMDJSON_NO_PORTABILITY_WARNING +#pragma message("The simdjson library is designed \ +for 64-bit processors and it seems that you are not \ +compiling for a known 64-bit platform. All fast kernels \ +will be disabled and performance may be poor. Please \ +use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") +#endif // SIMDJSON_NO_PORTABILITY_WARNING +#endif // SIMDJSON_IS_32BITS + +// this is almost standard? +#undef SIMDJSON_STRINGIFY_IMPLEMENTATION_ +#undef SIMDJSON_STRINGIFY +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) #a +#define SIMDJSON_STRINGIFY(a) SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) + +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The simdjson users should still have the fallback kernel. It is +// slower, but it should run everywhere. + +// +// Enable valid runtime implementations, and select SIMDJSON_BUILTIN_IMPLEMENTATION +// + +// We are going to use runtime dispatch. +#ifdef SIMDJSON_IS_X86_64 +#ifdef __clang__ +// clang does not have GCC push pop +// warning: clang attribute push can't be used within a namespace in clang up +// til 8.0 so SIMDJSON_TARGET_REGION and SIMDJSON_UNTARGET_REGION must be *outside* of a +// namespace. +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma(SIMDJSON_STRINGIFY( \ + clang attribute push(__attribute__((target(T))), apply_to = function))) +#define SIMDJSON_UNTARGET_REGION _Pragma("clang attribute pop") +#elif defined(__GNUC__) +// GCC is easier +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma("GCC push_options") _Pragma(SIMDJSON_STRINGIFY(GCC target(T))) +#define SIMDJSON_UNTARGET_REGION _Pragma("GCC pop_options") +#endif // clang then gcc + +#endif // x86 + +// Default target region macros don't do anything. +#ifndef SIMDJSON_TARGET_REGION +#define SIMDJSON_TARGET_REGION(T) +#define SIMDJSON_UNTARGET_REGION +#endif + +// Is threading enabled? +#if defined(_REENTRANT) || defined(_MT) +#ifndef SIMDJSON_THREADS_ENABLED +#define SIMDJSON_THREADS_ENABLED +#endif +#endif + +// workaround for large stack sizes under -O0. +// https://github.com/simdjson/simdjson/issues/691 +#ifdef __APPLE__ +#ifndef __OPTIMIZE__ +// Apple systems have small stack sizes in secondary threads. +// Lack of compiler optimization may generate high stack usage. +// Users may want to disable threads for safety, but only when +// in debug mode which we detect by the fact that the __OPTIMIZE__ +// macro is not defined. +#undef SIMDJSON_THREADS_ENABLED +#endif +#endif + + +#if defined(__clang__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) +#elif defined(__GNUC__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) +#else +#define SIMDJSON_NO_SANITIZE_UNDEFINED +#endif + +#ifdef SIMDJSON_VISUAL_STUDIO +// This is one case where we do not distinguish between +// regular visual studio and clang under visual studio. +// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) +#define simdjson_strcasecmp _stricmp +#define simdjson_strncasecmp _strnicmp +#else +// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8). +// So they are only useful for ASCII in our context. +// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings +#define simdjson_strcasecmp strcasecmp +#define simdjson_strncasecmp strncasecmp +#endif + +#ifdef NDEBUG + +#ifdef SIMDJSON_VISUAL_STUDIO +#define SIMDJSON_UNREACHABLE() __assume(0) +#define SIMDJSON_ASSUME(COND) __assume(COND) +#else +#define SIMDJSON_UNREACHABLE() __builtin_unreachable(); +#define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0) +#endif + +#else // NDEBUG + +#define SIMDJSON_UNREACHABLE() assert(0); +#define SIMDJSON_ASSUME(COND) assert(COND) + +#endif + +#endif // SIMDJSON_PORTABILITY_H +/* end file include/simdjson/portability.h */ + +namespace simdjson { + +namespace internal { +/** + * @private + * Our own implementation of the C++17 to_chars function. + * Defined in src/to_chars + */ +char *to_chars(char *first, const char *last, double value); +/** + * @private + * A number parsing routine. + * Defined in src/from_chars + */ +double from_chars(const char *first) noexcept; +double from_chars(const char *first, const char* end) noexcept; + +} + +#ifndef SIMDJSON_EXCEPTIONS +#if __cpp_exceptions +#define SIMDJSON_EXCEPTIONS 1 +#else +#define SIMDJSON_EXCEPTIONS 0 +#endif +#endif + +/** The maximum document size supported by simdjson. */ +constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; + +/** + * The amount of padding needed in a buffer to parse JSON. + * + * the input buf should be readable up to buf + SIMDJSON_PADDING + * this is a stopgap; there should be a better description of the + * main loop and its behavior that abstracts over this + * See https://github.com/simdjson/simdjson/issues/174 + */ +constexpr size_t SIMDJSON_PADDING = 32; + +/** + * By default, simdjson supports this many nested objects and arrays. + * + * This is the default for parser::max_depth(). + */ +constexpr size_t DEFAULT_MAX_DEPTH = 1024; + +} // namespace simdjson + +#if defined(__GNUC__) + // Marks a block with a name so that MCA analysis can see it. + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); + #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); + #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); +#else + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) + #define SIMDJSON_END_DEBUG_BLOCK(name) + #define SIMDJSON_DEBUG_BLOCK(name, block) +#endif + +// Align to N-byte boundary +#define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) +#define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) + +#define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) + +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) + + #define simdjson_really_inline __forceinline + #define simdjson_never_inline __declspec(noinline) + + #define simdjson_unused + #define simdjson_warn_unused + + #ifndef simdjson_likely + #define simdjson_likely(x) x + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) x + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) + #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). + #ifdef __has_include + #if __has_include() + #include + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING + #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop )) + +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + + #define simdjson_really_inline inline __attribute__((always_inline)) + #define simdjson_never_inline inline __attribute__((noinline)) + + #define simdjson_unused __attribute__((unused)) + #define simdjson_warn_unused __attribute__((warn_unused_result)) + + #ifndef simdjson_likely + #define simdjson_likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) __builtin_expect(!!(x), 0) + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary + // We do it separately for clang since it has different warnings. + #ifdef __clang__ + // clang is missing -Wmaybe-uninitialized. + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) + #else // __clang__ + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) + #endif // __clang__ + + #define SIMDJSON_PRAGMA(P) _Pragma(#P) + #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) + #if defined(SIMDJSON_CLANG_VISUAL_STUDIO) + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wstrict-overflow) + #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") + + + +#endif // MSC_VER + +#if defined(SIMDJSON_VISUAL_STUDIO) + /** + * Windows users need to do some extra work when building + * or using a dynamic library (DLL). When building, we need + * to set SIMDJSON_DLLIMPORTEXPORT to __declspec(dllexport). + * When *using* the DLL, the user needs to set + * SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport). + * + * Static libraries not need require such work. + * + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio, you still need to handle these issues. + * + * Non-Windows systems do not have this complexity. + */ + #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY + // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows. + // It should never happen that both SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY and + // SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY are set. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport) + #elif SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY + // Windows user who call a dynamic library should set SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY to 1. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) + #else + // We assume by default static linkage + #define SIMDJSON_DLLIMPORTEXPORT + #endif + +/** + * Workaround for the vcpkg package manager. Only vcpkg should + * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. + */ +#if SIMDJSON_USING_LIBRARY +#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) +#endif +/** + * End of workaround for the vcpkg package manager. + */ +#else + #define SIMDJSON_DLLIMPORTEXPORT +#endif + +// C++17 requires string_view. +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_HAS_STRING_VIEW +#include // by the standard, this has to be safe. +#endif + +// This macro (__cpp_lib_string_view) has to be defined +// for C++17 and better, but if it is otherwise defined, +// we are going to assume that string_view is available +// even if we do not have C++17 support. +#ifdef __cpp_lib_string_view +#define SIMDJSON_HAS_STRING_VIEW +#endif + +// Some systems have string_view even if we do not have C++17 support, +// and even if __cpp_lib_string_view is undefined, it is the case +// with Apple clang version 11. +// We must handle it. *This is important.* +#ifndef SIMDJSON_HAS_STRING_VIEW +#if defined __has_include +// do not combine the next #if with the previous one (unsafe) +#if __has_include () +// now it is safe to trigger the include +#include // though the file is there, it does not follow that we got the implementation +#if defined(_LIBCPP_STRING_VIEW) +// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17, +// included string_view. +// This means that we have string_view *even though* we may not have C++17. +#define SIMDJSON_HAS_STRING_VIEW +#endif // _LIBCPP_STRING_VIEW +#endif // __has_include () +#endif // defined __has_include +#endif // def SIMDJSON_HAS_STRING_VIEW +// end of complicated but important routine to try to detect string_view. + +// +// Backfill std::string_view using nonstd::string_view on systems where +// we expect that string_view is missing. Important: if we get this wrong, +// we will end up with two string_view definitions and potential trouble. +// That is why we work so hard above to avoid it. +// +#ifndef SIMDJSON_HAS_STRING_VIEW +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +/* begin file include/simdjson/nonstd/string_view.hpp */ +// Copyright 2017-2020 by Martin Moene +// +// string-view lite, a C++17-like string_view for C++98 and later. +// For more information see https://github.com/martinmoene/string-view-lite +// +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#ifndef NONSTD_SV_LITE_H_INCLUDED +#define NONSTD_SV_LITE_H_INCLUDED + +#define string_view_lite_MAJOR 1 +#define string_view_lite_MINOR 6 +#define string_view_lite_PATCH 0 + +#define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) + +#define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x ) +#define nssv_STRINGIFY_( x ) #x + +// string-view lite configuration: + +#define nssv_STRING_VIEW_DEFAULT 0 +#define nssv_STRING_VIEW_NONSTD 1 +#define nssv_STRING_VIEW_STD 2 + +// tweak header support: + +#ifdef __has_include +# if __has_include() +# include +# endif +#define nssv_HAVE_TWEAK_HEADER 1 +#else +#define nssv_HAVE_TWEAK_HEADER 0 +//# pragma message("string_view.hpp: Note: Tweak header not supported.") +#endif + +// string_view selection and configuration: + +#if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) +# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) +#endif + +#ifndef nssv_CONFIG_STD_SV_OPERATOR +# define nssv_CONFIG_STD_SV_OPERATOR 0 +#endif + +#ifndef nssv_CONFIG_USR_SV_OPERATOR +# define nssv_CONFIG_USR_SV_OPERATOR 1 +#endif + +#ifdef nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1 +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 +#endif + +#ifndef nssv_CONFIG_NO_STREAM_INSERTION +# define nssv_CONFIG_NO_STREAM_INSERTION 0 +#endif + +// Control presence of exception handling (try and auto discover): + +#ifndef nssv_CONFIG_NO_EXCEPTIONS +# if _MSC_VER +# include // for _HAS_EXCEPTIONS +# endif +# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS) +# define nssv_CONFIG_NO_EXCEPTIONS 0 +# else +# define nssv_CONFIG_NO_EXCEPTIONS 1 +# endif +#endif + +// C++ language version detection (C++20 is speculative): +// Note: VC14.0/1900 (VS2015) lacks too much from C++14. + +#ifndef nssv_CPLUSPLUS +# if defined(_MSVC_LANG ) && !defined(__clang__) +# define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG ) +# else +# define nssv_CPLUSPLUS __cplusplus +# endif +#endif + +#define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L ) +#define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L ) +#define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L ) +#define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202000L ) + +// use C++17 std::string_view if available and requested: + +#if nssv_CPP17_OR_GREATER && defined(__has_include ) +# if __has_include( ) +# define nssv_HAVE_STD_STRING_VIEW 1 +# else +# define nssv_HAVE_STD_STRING_VIEW 0 +# endif +#else +# define nssv_HAVE_STD_STRING_VIEW 0 +#endif + +#define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) ) + +#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW ) +#define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH + +// +// Use C++17 std::string_view: +// + +#if nssv_USES_STD_STRING_VIEW + +#include + +// Extensions for std::string: + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( std::basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string_view +to_string_view( std::basic_string const & s ) +{ + return std::basic_string_view( s.data(), s.size() ); +} + +// Literal operators sv and _sv: + +#if nssv_CONFIG_STD_SV_OPERATOR + +using namespace std::literals::string_view_literals; + +#endif + +#if nssv_CONFIG_USR_SV_OPERATOR + +inline namespace literals { +inline namespace string_view_literals { + + +constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +{ + return std::string_view{ str, len }; +} + +constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +{ + return std::u16string_view{ str, len }; +} + +constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +{ + return std::u32string_view{ str, len }; +} + +constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +{ + return std::wstring_view{ str, len }; +} + +}} // namespace literals::string_view_literals + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +} // namespace nonstd + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +using std::string_view; +using std::wstring_view; +using std::u16string_view; +using std::u32string_view; +using std::basic_string_view; + +// literal "sv" and "_sv", see above + +using std::operator==; +using std::operator!=; +using std::operator<; +using std::operator<=; +using std::operator>; +using std::operator>=; + +using std::operator<<; + +} // namespace nonstd + +#else // nssv_HAVE_STD_STRING_VIEW + +// +// Before C++17: use string_view lite: +// + +// Compiler versions: +// +// MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) +// MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) +// MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) +// MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) +// MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) +// MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) +// MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) +// MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) +// MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) +// MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) +// MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) + +#if defined(_MSC_VER ) && !defined(__clang__) +# define nssv_COMPILER_MSVC_VER (_MSC_VER ) +# define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) ) +#else +# define nssv_COMPILER_MSVC_VER 0 +# define nssv_COMPILER_MSVC_VERSION 0 +#endif + +#define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) + +#if defined( __apple_build_version__ ) +# define nssv_COMPILER_APPLECLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +# define nssv_COMPILER_CLANG_VERSION 0 +#elif defined( __clang__ ) +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +#else +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION 0 +#endif + +#if defined(__GNUC__) && !defined(__clang__) +# define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#else +# define nssv_COMPILER_GNUC_VERSION 0 +#endif + +// half-open range [lo..hi): +#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) ) + +// Presence of language and library features: + +#ifdef _HAS_CPP0X +# define nssv_HAS_CPP0X _HAS_CPP0X +#else +# define nssv_HAS_CPP0X 0 +#endif + +// Unless defined otherwise below, consider VC14 as C++11 for variant-lite: + +#if nssv_COMPILER_MSVC_VER >= 1900 +# undef nssv_CPP11_OR_GREATER +# define nssv_CPP11_OR_GREATER 1 +#endif + +#define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500) +#define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600) +#define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700) +#define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800) +#define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900) +#define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910) + +#define nssv_CPP14_000 (nssv_CPP14_OR_GREATER) +#define nssv_CPP17_000 (nssv_CPP17_OR_GREATER) + +// Presence of C++11 language features: + +#define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140 +#define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140 +#define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140 +#define nssv_HAVE_NOEXCEPT nssv_CPP11_140 +#define nssv_HAVE_NULLPTR nssv_CPP11_100 +#define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140 +#define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140 +#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140 +#define nssv_HAVE_WCHAR16_T nssv_CPP11_100 +#define nssv_HAVE_WCHAR32_T nssv_CPP11_100 + +#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) ) +# define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140 +#else +# define nssv_HAVE_STD_DEFINED_LITERALS 0 +#endif + +// Presence of C++14 language features: + +#define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000 + +// Presence of C++17 language features: + +#define nssv_HAVE_NODISCARD nssv_CPP17_000 + +// Presence of C++ library features: + +#define nssv_HAVE_STD_HASH nssv_CPP11_120 + +// Presence of compiler intrinsics: + +// Providing char-type specializations for compare() and length() that +// use compiler intrinsics can improve compile- and run-time performance. +// +// The challenge is in using the right combinations of builtin availability +// and its constexpr-ness. +// +// | compiler | __builtin_memcmp (constexpr) | memcmp (constexpr) | +// |----------|------------------------------|---------------------| +// | clang | 4.0 (>= 4.0 ) | any (? ) | +// | clang-a | 9.0 (>= 9.0 ) | any (? ) | +// | gcc | any (constexpr) | any (? ) | +// | msvc | >= 14.2 C++17 (>= 14.2 ) | any (? ) | + +#define nssv_HAVE_BUILTIN_VER ( (nssv_CPP17_000 && nssv_COMPILER_MSVC_VERSION >= 142) || nssv_COMPILER_GNUC_VERSION > 0 || nssv_COMPILER_CLANG_VERSION >= 400 || nssv_COMPILER_APPLECLANG_VERSION >= 900 ) +#define nssv_HAVE_BUILTIN_CE ( nssv_HAVE_BUILTIN_VER ) + +#define nssv_HAVE_BUILTIN_MEMCMP ( (nssv_HAVE_CONSTEXPR_14 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_14 ) +#define nssv_HAVE_BUILTIN_STRLEN ( (nssv_HAVE_CONSTEXPR_11 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_11 ) + +#ifdef __has_builtin +# define nssv_HAVE_BUILTIN( x ) __has_builtin( x ) +#else +# define nssv_HAVE_BUILTIN( x ) 0 +#endif + +#if nssv_HAVE_BUILTIN(__builtin_memcmp) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_MEMCMP __builtin_memcmp +#else +# define nssv_BUILTIN_MEMCMP memcmp +#endif + +#if nssv_HAVE_BUILTIN(__builtin_strlen) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_STRLEN __builtin_strlen +#else +# define nssv_BUILTIN_STRLEN strlen +#endif + +// C++ feature usage: + +#if nssv_HAVE_CONSTEXPR_11 +# define nssv_constexpr constexpr +#else +# define nssv_constexpr /*constexpr*/ +#endif + +#if nssv_HAVE_CONSTEXPR_14 +# define nssv_constexpr14 constexpr +#else +# define nssv_constexpr14 /*constexpr*/ +#endif + +#if nssv_HAVE_EXPLICIT_CONVERSION +# define nssv_explicit explicit +#else +# define nssv_explicit /*explicit*/ +#endif + +#if nssv_HAVE_INLINE_NAMESPACE +# define nssv_inline_ns inline +#else +# define nssv_inline_ns /*inline*/ +#endif + +#if nssv_HAVE_NOEXCEPT +# define nssv_noexcept noexcept +#else +# define nssv_noexcept /*noexcept*/ +#endif + +//#if nssv_HAVE_REF_QUALIFIER +//# define nssv_ref_qual & +//# define nssv_refref_qual && +//#else +//# define nssv_ref_qual /*&*/ +//# define nssv_refref_qual /*&&*/ +//#endif + +#if nssv_HAVE_NULLPTR +# define nssv_nullptr nullptr +#else +# define nssv_nullptr NULL +#endif + +#if nssv_HAVE_NODISCARD +# define nssv_nodiscard [[nodiscard]] +#else +# define nssv_nodiscard /*[[nodiscard]]*/ +#endif + +// Additional includes: + +#include +#include +#include +#include +#include // std::char_traits<> + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +# include +#endif + +#if ! nssv_CONFIG_NO_EXCEPTIONS +# include +#endif + +#if nssv_CPP11_OR_GREATER +# include +#endif + +// Clang, GNUC, MSVC warning suppression macros: + +#if defined(__clang__) +# pragma clang diagnostic ignored "-Wreserved-user-defined-literal" +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wuser-defined-literals" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wliteral-suffix" +#endif // __clang__ + +#if nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]] +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) ) +# define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes)) +#else +# define nssv_SUPPRESS_MSGSL_WARNING(expr) +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) +# define nssv_DISABLE_MSVC_WARNINGS(codes) +#endif + +#if defined(__clang__) +# define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop") +#elif defined(__GNUC__) +# define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop") +#elif nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_RESTORE_WARNINGS() __pragma(warning(pop )) +#else +# define nssv_RESTORE_WARNINGS() +#endif + +// Suppress the following MSVC (GSL) warnings: +// - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not +// start with an underscore are reserved +// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions; +// use brace initialization, gsl::narrow_cast or gsl::narow +// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead + +nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) +//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" ) +//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix ) + +namespace nonstd { namespace sv_lite { + +namespace detail { + +// support constexpr comparison in C++14; +// for C++17 and later, use provided traits: + +template< typename CharT > +inline nssv_constexpr14 int compare( CharT const * s1, CharT const * s2, std::size_t count ) +{ + while ( count-- != 0 ) + { + if ( *s1 < *s2 ) return -1; + if ( *s1 > *s2 ) return +1; + ++s1; ++s2; + } + return 0; +} + +#if nssv_HAVE_BUILTIN_MEMCMP + +// specialization of compare() for char, see also generic compare() above: + +inline nssv_constexpr14 int compare( char const * s1, char const * s2, std::size_t count ) +{ + return nssv_BUILTIN_MEMCMP( s1, s2, count ); +} + +#endif + +#if nssv_HAVE_BUILTIN_STRLEN + +// specialization of length() for char, see also generic length() further below: + +inline nssv_constexpr std::size_t length( char const * s ) +{ + return nssv_BUILTIN_STRLEN( s ); +} + +#endif + +#if defined(__OPTIMIZE__) + +// gcc, clang provide __OPTIMIZE__ +// Expect tail call optimization to make length() non-recursive: + +template< typename CharT > +inline nssv_constexpr std::size_t length( CharT * s, std::size_t result = 0 ) +{ + return *s == '\0' ? result : length( s + 1, result + 1 ); +} + +#else // OPTIMIZE + +// non-recursive: + +template< typename CharT > +inline nssv_constexpr14 std::size_t length( CharT * s ) +{ + std::size_t result = 0; + while ( *s++ != '\0' ) + { + ++result; + } + return result; +} + +#endif // OPTIMIZE + +} // namespace detail + +template +< + class CharT, + class Traits = std::char_traits +> +class basic_string_view; + +// +// basic_string_view: +// + +template +< + class CharT, + class Traits /* = std::char_traits */ +> +class basic_string_view +{ +public: + // Member types: + + typedef Traits traits_type; + typedef CharT value_type; + + typedef CharT * pointer; + typedef CharT const * const_pointer; + typedef CharT & reference; + typedef CharT const & const_reference; + + typedef const_pointer iterator; + typedef const_pointer const_iterator; + typedef std::reverse_iterator< const_iterator > reverse_iterator; + typedef std::reverse_iterator< const_iterator > const_reverse_iterator; + + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + // 24.4.2.1 Construction and assignment: + + nssv_constexpr basic_string_view() nssv_noexcept + : data_( nssv_nullptr ) + , size_( 0 ) + {} + +#if nssv_CPP11_OR_GREATER + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept + : data_( other.data_) + , size_( other.size_) + {} +#endif + + nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept + : data_( s ) + , size_( count ) + {} + + nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept + : data_( s ) +#if nssv_CPP17_OR_GREATER + , size_( Traits::length(s) ) +#elif nssv_CPP11_OR_GREATER + , size_( detail::length(s) ) +#else + , size_( Traits::length(s) ) +#endif + {} + + // Assignment: + +#if nssv_CPP11_OR_GREATER + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept + { + data_ = other.data_; + size_ = other.size_; + return *this; + } +#endif + + // 24.4.2.2 Iterator support: + + nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; } + nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; } + + nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); } + nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); } + + nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); } + nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); } + + nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); } + nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); } + + // 24.4.2.3 Capacity: + + nssv_constexpr size_type size() const nssv_noexcept { return size_; } + nssv_constexpr size_type length() const nssv_noexcept { return size_; } + nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); } + + // since C++20 + nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept + { + return 0 == size_; + } + + // 24.4.2.4 Element access: + + nssv_constexpr const_reference operator[]( size_type pos ) const + { + return data_at( pos ); + } + + nssv_constexpr14 const_reference at( size_type pos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos < size() ); +#else + if ( pos >= size() ) + { + throw std::out_of_range("nonstd::string_view::at()"); + } +#endif + return data_at( pos ); + } + + nssv_constexpr const_reference front() const { return data_at( 0 ); } + nssv_constexpr const_reference back() const { return data_at( size() - 1 ); } + + nssv_constexpr const_pointer data() const nssv_noexcept { return data_; } + + // 24.4.2.5 Modifiers: + + nssv_constexpr14 void remove_prefix( size_type n ) + { + assert( n <= size() ); + data_ += n; + size_ -= n; + } + + nssv_constexpr14 void remove_suffix( size_type n ) + { + assert( n <= size() ); + size_ -= n; + } + + nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept + { + const basic_string_view tmp(other); + other = *this; + *this = tmp; + } + + // 24.4.2.6 String operations: + + size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::copy()"); + } +#endif + const size_type rlen = (std::min)( n, size() - pos ); + + (void) Traits::copy( dest, data() + pos, rlen ); + + return rlen; + } + + nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::substr()"); + } +#endif + return basic_string_view( data() + pos, (std::min)( n, size() - pos ) ); + } + + // compare(), 6x: + + nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) + { +#if nssv_CPP17_OR_GREATER + if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#else + if ( const int result = detail::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#endif + { + return result; + } + + return size() == other.size() ? 0 : size() < other.size() ? -1 : 1; + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2) + { + return substr( pos1, n1 ).compare( other ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3) + { + return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); + } + + nssv_constexpr int compare( CharT const * s ) const // (4) + { + return compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5) + { + return substr( pos1, n1 ).compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6) + { + return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) ); + } + + // 24.4.2.7 Searching: + + // starts_with(), 3x, since C++20: + + nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( 0, v.size(), v ) == 0; + } + + nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2) + { + return starts_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool starts_with( CharT const * s ) const // (3) + { + return starts_with( basic_string_view( s ) ); + } + + // ends_with(), 3x, since C++20: + + nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0; + } + + nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2) + { + return ends_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool ends_with( CharT const * s ) const // (3) + { + return ends_with( basic_string_view( s ) ); + } + + // find(), 4x: + + nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return assert( v.size() == 0 || v.data() != nssv_nullptr ) + , pos >= size() + ? npos + : to_pos( std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr14 size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr14 size_type find( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find( basic_string_view( s, n ), pos ); + } + + nssv_constexpr14 size_type find( CharT const * s, size_type pos = 0 ) const // (4) + { + return find( basic_string_view( s ), pos ); + } + + // rfind(), 4x: + + nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + if ( size() < v.size() ) + { + return npos; + } + + if ( v.empty() ) + { + return (std::min)( size(), pos ); + } + + const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size(); + const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq ); + + return result != last ? size_type( result - cbegin() ) : npos; + } + + nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return rfind( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3) + { + return rfind( basic_string_view( s, n ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4) + { + return rfind( basic_string_view( s ), pos ); + } + + // find_first_of(), 4x: + + nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find_first_of( basic_string_view( s, n ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_of( basic_string_view( s ), pos ); + } + + // find_last_of(), 4x: + + nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_of( v, size() - 1 ) + : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_of( basic_string_view( s ), pos ); + } + + // find_first_not_of(), 4x: + + nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_first_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_not_of( basic_string_view( s ), pos ); + } + + // find_last_not_of(), 4x: + + nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_not_of( v, size() - 1 ) + : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_not_of( basic_string_view( s ), pos ); + } + + // Constants: + +#if nssv_CPP17_OR_GREATER + static nssv_constexpr size_type npos = size_type(-1); +#elif nssv_CPP11_OR_GREATER + enum : size_type { npos = size_type(-1) }; +#else + enum { npos = size_type(-1) }; +#endif + +private: + struct not_in_view + { + const basic_string_view v; + + nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {} + + nssv_constexpr bool operator()( CharT c ) const + { + return npos == v.find_first_of( c ); + } + }; + + nssv_constexpr size_type to_pos( const_iterator it ) const + { + return it == cend() ? npos : size_type( it - cbegin() ); + } + + nssv_constexpr size_type to_pos( const_reverse_iterator it ) const + { + return it == crend() ? npos : size_type( crend() - it - 1 ); + } + + nssv_constexpr const_reference data_at( size_type pos ) const + { +#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 ) + return data_[pos]; +#else + return assert( pos < size() ), data_[pos]; +#endif + } + +private: + const_pointer data_; + size_type size_; + +public: +#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS + + template< class Allocator > + basic_string_view( std::basic_string const & s ) nssv_noexcept + : data_( s.data() ) + , size_( s.size() ) + {} + +#if nssv_HAVE_EXPLICIT_CONVERSION + + template< class Allocator > + explicit operator std::basic_string() const + { + return to_string( Allocator() ); + } + +#endif // nssv_HAVE_EXPLICIT_CONVERSION + +#if nssv_CPP11_OR_GREATER + + template< class Allocator = std::allocator > + std::basic_string + to_string( Allocator const & a = Allocator() ) const + { + return std::basic_string( begin(), end(), a ); + } + +#else + + std::basic_string + to_string() const + { + return std::basic_string( begin(), end() ); + } + + template< class Allocator > + std::basic_string + to_string( Allocator const & a ) const + { + return std::basic_string( begin(), end(), a ); + } + +#endif // nssv_CPP11_OR_GREATER + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +}; + +// +// Non-member functions: +// + +// 24.4.3 Non-member comparison functions: +// lexicographically compare two string views (function template): + +template< class CharT, class Traits > +nssv_constexpr bool operator== ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator!= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits > +nssv_constexpr bool operator< ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator<= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator> ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator>= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +// Let S be basic_string_view, and sv be an instance of S. +// Implementations shall provide sufficient additional overloads marked +// constexpr and noexcept so that an object t with an implicit conversion +// to S can be compared according to Table 67. + +#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) + +// accommodate for older compilers: + +// == + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.size() == detail::length( rhs ) && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return detail::length( lhs ) == rhs.size() && rhs.compare( lhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +// <= + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +// > + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +// >= + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +#else // newer compilers: + +#define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view >::type + +#if defined(_MSC_VER) // issue 40 +# define nssv_MSVC_ORDER(x) , int=x +#else +# define nssv_MSVC_ORDER(x) /*, int=x*/ +#endif + +// == + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator==( + basic_string_view lhs, + nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator==( + nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator!= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator!= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator< ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator< ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +// <= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator<= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator<= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +// > + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator> ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator> ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +// >= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator>= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator>= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +#undef nssv_MSVC_ORDER +#undef nssv_BASIC_STRING_VIEW_I + +#endif // compiler-dependent approach to comparisons + +// 24.4.4 Inserters and extractors: + +#if ! nssv_CONFIG_NO_STREAM_INSERTION + +namespace detail { + +template< class Stream > +void write_padding( Stream & os, std::streamsize n ) +{ + for ( std::streamsize i = 0; i < n; ++i ) + os.rdbuf()->sputc( os.fill() ); +} + +template< class Stream, class View > +Stream & write_to_stream( Stream & os, View const & sv ) +{ + typename Stream::sentry sentry( os ); + + if ( !os ) + return os; + + const std::streamsize length = static_cast( sv.length() ); + + // Whether, and how, to pad: + const bool pad = ( length < os.width() ); + const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right; + + if ( left_pad ) + write_padding( os, os.width() - length ); + + // Write span characters: + os.rdbuf()->sputn( sv.begin(), length ); + + if ( pad && !left_pad ) + write_padding( os, os.width() - length ); + + // Reset output stream width: + os.width( 0 ); + + return os; +} + +} // namespace detail + +template< class CharT, class Traits > +std::basic_ostream & +operator<<( + std::basic_ostream& os, + basic_string_view sv ) +{ + return detail::write_to_stream( os, sv ); +} + +#endif // nssv_CONFIG_NO_STREAM_INSERTION + +// Several typedefs for common character types are provided: + +typedef basic_string_view string_view; +typedef basic_string_view wstring_view; +#if nssv_HAVE_WCHAR16_T +typedef basic_string_view u16string_view; +typedef basic_string_view u32string_view; +#endif + +}} // namespace nonstd::sv_lite + +// +// 24.4.6 Suffix for basic_string_view literals: +// + +#if nssv_HAVE_USER_DEFINED_LITERALS + +namespace nonstd { +nssv_inline_ns namespace literals { +nssv_inline_ns namespace string_view_literals { + +#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +#if nssv_CONFIG_USR_SV_OPERATOR + +nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +}}} // namespace nonstd::literals::string_view_literals + +#endif + +// +// Extensions for std::string: +// + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { +namespace sv_lite { + +// Exclude MSVC 14 (19.00): it yields ambiguous to_string(): + +#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140 + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#else + +template< class CharT, class Traits > +std::basic_string +to_string( basic_string_view v ) +{ + return std::basic_string( v.begin(), v.end() ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#endif // nssv_CPP11_OR_GREATER + +template< class CharT, class Traits, class Allocator > +basic_string_view +to_string_view( std::basic_string const & s ) +{ + return basic_string_view( s.data(), s.size() ); +} + +}} // namespace nonstd::sv_lite + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +// +// make types and algorithms available in namespace nonstd: +// + +namespace nonstd { + +using sv_lite::basic_string_view; +using sv_lite::string_view; +using sv_lite::wstring_view; + +#if nssv_HAVE_WCHAR16_T +using sv_lite::u16string_view; +#endif +#if nssv_HAVE_WCHAR32_T +using sv_lite::u32string_view; +#endif + +// literal "sv" + +using sv_lite::operator==; +using sv_lite::operator!=; +using sv_lite::operator<; +using sv_lite::operator<=; +using sv_lite::operator>; +using sv_lite::operator>=; + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +using sv_lite::operator<<; +#endif + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +using sv_lite::to_string; +using sv_lite::to_string_view; +#endif + +} // namespace nonstd + +// 24.4.5 Hash support (C++11): + +// Note: The hash value of a string view object is equal to the hash value of +// the corresponding string object. + +#if nssv_HAVE_STD_HASH + +#include + +namespace std { + +template<> +struct hash< nonstd::string_view > +{ +public: + std::size_t operator()( nonstd::string_view v ) const nssv_noexcept + { + return std::hash()( std::string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::wstring_view > +{ +public: + std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept + { + return std::hash()( std::wstring( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u16string_view > +{ +public: + std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept + { + return std::hash()( std::u16string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u32string_view > +{ +public: + std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept + { + return std::hash()( std::u32string( v.data(), v.size() ) ); + } +}; + +} // namespace std + +#endif // nssv_HAVE_STD_HASH + +nssv_RESTORE_WARNINGS() + +#endif // nssv_HAVE_STD_STRING_VIEW +#endif // NONSTD_SV_LITE_H_INCLUDED +/* end file include/simdjson/nonstd/string_view.hpp */ +SIMDJSON_POP_DISABLE_WARNINGS + +namespace std { + using string_view = nonstd::string_view; +} +#endif // SIMDJSON_HAS_STRING_VIEW +#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. + +/// If EXPR is an error, returns it. +#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } + +#ifndef SIMDJSON_DEVELOPMENT_CHECKS +#ifndef NDEBUG +#define SIMDJSON_DEVELOPMENT_CHECKS +#endif +#endif + +// The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" +// feature. + +#if SIMDJSON_CPLUSPLUS17 +// if we have C++, then fallthrough is a default attribute +# define simdjson_fallthrough [[fallthrough]] +// check if we have __attribute__ support +#elif defined(__has_attribute) +// check if we have the __fallthrough__ attribute +#if __has_attribute(__fallthrough__) +// we are good to go: +# define simdjson_fallthrough __attribute__((__fallthrough__)) +#endif // __has_attribute(__fallthrough__) +#endif // SIMDJSON_CPLUSPLUS17 +// on some systems, we simply do not have support for fallthrough, so use a default: +#ifndef simdjson_fallthrough +# define simdjson_fallthrough do {} while (0) /* fallthrough */ +#endif // simdjson_fallthrough + +#endif // SIMDJSON_COMMON_DEFS_H +/* end file include/simdjson/common_defs.h */ + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_UNDESIRED_WARNINGS + +// Public API +/* begin file include/simdjson/error.h */ +#ifndef SIMDJSON_ERROR_H +#define SIMDJSON_ERROR_H + +#include + +namespace simdjson { + +/** + * All possible errors returned by simdjson. These error codes are subject to change + * and not all simdjson kernel returns the same error code given the same input: it is not + * well defined which error a given input should produce. + * + * Only SUCCESS evaluates to false as a Boolean. All other error codes will evaluate + * to true as a Boolean. + */ +enum error_code { + SUCCESS = 0, ///< No error + CAPACITY, ///< This parser can't support a document that big + MEMALLOC, ///< Error allocating memory, most likely out of memory + TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error + DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation + STRING_ERROR, ///< Problem while parsing a string + T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' + F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' + N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' + NUMBER_ERROR, ///< Problem while parsing a number + UTF8_ERROR, ///< the input is not valid UTF-8 + UNINITIALIZED, ///< unknown error, or uninitialized document + EMPTY, ///< no structural element found + UNESCAPED_CHARS, ///< found unescaped characters in a string. + UNCLOSED_STRING, ///< missing quote at the end + UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture + INCORRECT_TYPE, ///< JSON element has a different type than user expected + NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits + INDEX_OUT_OF_BOUNDS, ///< JSON array index too large + NO_SUCH_FIELD, ///< JSON field not found in object + IO_ERROR, ///< Error reading a file + INVALID_JSON_POINTER, ///< Invalid JSON pointer reference + INVALID_URI_FRAGMENT, ///< Invalid URI fragment + UNEXPECTED_ERROR, ///< indicative of a bug in simdjson + PARSER_IN_USE, ///< parser is already in use. + OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order + INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. + OUT_OF_BOUNDS, ///< Attempted to access location outside of document. + NUM_ERROR_CODES +}; + +/** + * Get the error message for the given error code. + * + * dom::parser parser; + * dom::element doc; + * auto error = parser.parse("foo",3).get(doc); + * if (error) { printf("Error: %s\n", error_message(error)); } + * + * @return The error message. + */ +inline const char *error_message(error_code error) noexcept; + +/** + * Write the error message to the output stream + */ +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept; + +/** + * Exception thrown when an exception-supporting simdjson method is called + */ +struct simdjson_error : public std::exception { + /** + * Create an exception from a simdjson error code. + * @param error The error code + */ + simdjson_error(error_code error) noexcept : _error{error} { } + /** The error message */ + const char *what() const noexcept { return error_message(error()); } + /** The error code */ + error_code error() const noexcept { return _error; } +private: + /** The error code that was used */ + error_code _error; +}; + +namespace internal { + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::simdjson_result_base { + * simdjson_result() noexcept : internal::simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct simdjson_result_base : protected std::pair { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_really_inline simdjson_result_base() noexcept; + + /** + * Create a new error result. + */ + simdjson_really_inline simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_really_inline simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_really_inline simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_really_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_really_inline error_code get(T &value) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_really_inline const T &value(error_code &error) const & noexcept; + + /** + * The error. + */ + simdjson_really_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_really_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_really_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result_base + +} // namespace internal + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + */ +template +struct simdjson_result : public internal::simdjson_result_base { + /** + * @private Create a new empty result with error = UNINITIALIZED. + */ + simdjson_really_inline simdjson_result() noexcept; + /** + * @private Create a new error result. + */ + simdjson_really_inline simdjson_result(T &&value) noexcept; + /** + * @private Create a new successful result. + */ + simdjson_really_inline simdjson_result(error_code error_code) noexcept; + /** + * @private Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_really_inline simdjson_result(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_really_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_warn_unused simdjson_really_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_really_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_really_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_really_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result + +#if SIMDJSON_EXCEPTIONS + +template +inline std::ostream& operator<<(std::ostream& out, simdjson_result value) { return out << value.value(); } +#endif // SIMDJSON_EXCEPTIONS + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +/** + * @deprecated This is an alias and will be removed, use error_code instead + */ +using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code; + +/** + * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead. + */ +[[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]] +inline const std::string error_message(int error) noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API +} // namespace simdjson + +#endif // SIMDJSON_ERROR_H +/* end file include/simdjson/error.h */ +/* begin file include/simdjson/minify.h */ +#ifndef SIMDJSON_MINIFY_H +#define SIMDJSON_MINIFY_H + +/* begin file include/simdjson/padded_string.h */ +#ifndef SIMDJSON_PADDED_STRING_H +#define SIMDJSON_PADDED_STRING_H + +#include +#include +#include +#include + +namespace simdjson { + +class padded_string_view; + +/** + * String with extra allocation for ease of use with parser::parse() + * + * This is a move-only class, it cannot be copied. + */ +struct padded_string final { + + /** + * Create a new, empty padded string. + */ + explicit inline padded_string() noexcept; + /** + * Create a new padded string buffer. + * + * @param length the size of the string. + */ + explicit inline padded_string(size_t length) noexcept; + /** + * Create a new padded string by copying the given input. + * + * @param data the buffer to copy + * @param length the number of bytes to copy + */ + explicit inline padded_string(const char *data, size_t length) noexcept; + /** + * Create a new padded string by copying the given input. + * + * @param str_ the string to copy + */ + inline padded_string(const std::string & str_ ) noexcept; + /** + * Create a new padded string by copying the given input. + * + * @param sv_ the string to copy + */ + inline padded_string(std::string_view sv_) noexcept; + /** + * Move one padded string into another. + * + * The original padded string will be reduced to zero capacity. + * + * @param o the string to move. + */ + inline padded_string(padded_string &&o) noexcept; + /** + * Move one padded string into another. + * + * The original padded string will be reduced to zero capacity. + * + * @param o the string to move. + */ + inline padded_string &operator=(padded_string &&o) noexcept; + inline void swap(padded_string &o) noexcept; + ~padded_string() noexcept; + + /** + * The length of the string. + * + * Does not include padding. + */ + size_t size() const noexcept; + + /** + * The length of the string. + * + * Does not include padding. + */ + size_t length() const noexcept; + + /** + * The string data. + **/ + const char *data() const noexcept; + const uint8_t *u8data() const noexcept { return static_cast(static_cast(data_ptr));} + + /** + * The string data. + **/ + char *data() noexcept; + + /** + * Create a std::string_view with the same content. + */ + operator std::string_view() const; + + /** + * Create a padded_string_view with the same content. + */ + operator padded_string_view() const noexcept; + + /** + * Load this padded string from a file. + * + * @return IO_ERROR on error. Be mindful that on some 32-bit systems, + * the file size might be limited to 2 GB. + * + * @param path the path to the file. + **/ + inline static simdjson_result load(std::string_view path) noexcept; + +private: + padded_string &operator=(const padded_string &o) = delete; + padded_string(const padded_string &o) = delete; + + size_t viable_size{0}; + char *data_ptr{nullptr}; + +}; // padded_string + +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string instance. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, const padded_string& s) { return out << s.data(); } + +#if SIMDJSON_EXCEPTIONS +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string instance. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } +#endif + +} // namespace simdjson + +// This is deliberately outside of simdjson so that people get it without having to use the namespace +inline simdjson::padded_string operator "" _padded(const char *str, size_t len) { + return simdjson::padded_string(str, len); +} + +namespace simdjson { +namespace internal { + +// The allocate_padded_buffer function is a low-level function to allocate memory +// with padding so we can read past the "length" bytes safely. It is used by +// the padded_string class automatically. It returns nullptr in case +// of error: the caller should check for a null pointer. +// The length parameter is the maximum size in bytes of the string. +// The caller is responsible to free the memory (e.g., delete[] (...)). +inline char *allocate_padded_buffer(size_t length) noexcept; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_PADDED_STRING_H +/* end file include/simdjson/padded_string.h */ +#include +#include +#include + +namespace simdjson { + + + +/** + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * This function is much faster than parsing a JSON string and then writing a minified version of it. + * However, it does not validate the input. It will merely return an error in simple cases (e.g., if + * there is a string that was never terminated). + * + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ +simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept; + +} // namespace simdjson + +#endif // SIMDJSON_MINIFY_H +/* end file include/simdjson/minify.h */ +/* begin file include/simdjson/padded_string_view.h */ +#ifndef SIMDJSON_PADDED_STRING_VIEW_H +#define SIMDJSON_PADDED_STRING_VIEW_H + + +#include +#include +#include +#include + +namespace simdjson { + +/** + * User-provided string that promises it has extra padded bytes at the end for use with parser::parse(). + */ +class padded_string_view : public std::string_view { +private: + size_t _capacity; + +public: + /** Create an empty padded_string_view. */ + inline padded_string_view() noexcept = default; + + /** + * Promise the given buffer has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * @param s The string. + * @param len The length of the string (not including padding). + * @param capacity The allocated length of the string, including padding. + */ + explicit inline padded_string_view(const char* s, size_t len, size_t capacity) noexcept; + /** overload explicit inline padded_string_view(const char* s, size_t len) noexcept */ + explicit inline padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept; + + /** + * Promise the given string has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * The capacity of the string will be used to determine its padding. + * + * @param s The string. + */ + explicit inline padded_string_view(const std::string &s) noexcept; + + /** + * Promise the given string_view has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * @param s The string. + * @param capacity The allocated length of the string, including padding. + */ + explicit inline padded_string_view(std::string_view s, size_t capacity) noexcept; + + /** The number of allocated bytes. */ + inline size_t capacity() const noexcept; + + /** The amount of padding on the string (capacity() - length()) */ + inline size_t padding() const noexcept; + +}; // padded_string_view + +#if SIMDJSON_EXCEPTIONS +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string_view. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } +#endif + +} // namespace simdjson + +#endif // SIMDJSON_PADDED_STRING_VIEW_H +/* end file include/simdjson/padded_string_view.h */ +/* begin file include/simdjson/implementation.h */ +#ifndef SIMDJSON_IMPLEMENTATION_H +#define SIMDJSON_IMPLEMENTATION_H + +/* begin file include/simdjson/internal/dom_parser_implementation.h */ +#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H + +#include + +namespace simdjson { + +namespace dom { +class document; +} // namespace dom + +/** +* This enum is used with the dom_parser_implementation::stage1 function. +* 1) The regular mode expects a fully formed JSON document. +* 2) The streaming_partial mode expects a possibly truncated +* input within a stream on JSON documents. +* 3) The stream_final mode allows us to truncate final +* unterminated strings. It is useful in conjunction with streaming_partial. +*/ +enum class stage1_mode { regular, streaming_partial, streaming_final}; + +/** + * Returns true if mode == streaming_partial or mode == streaming_final + */ +inline bool is_streaming(stage1_mode mode) { + // performance note: it is probably faster to check that mode is different + // from regular than checking that it is either streaming_partial or streaming_final. + return (mode != stage1_mode::regular); + // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final); +} + + +namespace internal { + + +/** + * An implementation of simdjson's DOM parser for a particular CPU architecture. + * + * This class is expected to be accessed only by pointer, and never move in memory (though the + * pointer can move). + */ +class dom_parser_implementation { +public: + + /** + * @private For internal implementation use + * + * Run a full JSON parse on a single document (stage1 + stage2). + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param len The length of the json document. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 1 of the document parser. + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. + * @param len The length of the json document. + * @param streaming Whether this is being called by parser::parse_many. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser. + * + * Called after stage1(). + * + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser for parser::parse_many. + * + * Guaranteed only to be called after stage1(). + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. + */ + simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0; + + /** + * Change the capacity of this parser. + * + * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB) + * and an CAPACITY error is returned if it is attempted. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_capacity(size_t capacity) noexcept = 0; + + /** + * Change the max depth of this parser. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_max_depth(size_t max_depth) noexcept = 0; + + /** + * Deallocate this parser. + */ + virtual ~dom_parser_implementation() = default; + + /** Number of structural indices passed from stage 1 to stage 2 */ + uint32_t n_structural_indexes{0}; + /** Structural indices passed from stage 1 to stage 2 */ + std::unique_ptr structural_indexes{}; + /** Next structural index to parse */ + uint32_t next_structural_index{0}; + + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + simdjson_really_inline size_t capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + simdjson_really_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept; + +protected: + /** + * The maximum document length this parser supports. + * + * Buffers are large enough to handle any document up to this length. + */ + size_t _capacity{0}; + + /** + * The maximum depth (number of nested objects and arrays) supported by this parser. + * + * Defaults to DEFAULT_MAX_DEPTH. + */ + size_t _max_depth{0}; + + // Declaring these so that subclasses can use them to implement their constructors. + simdjson_really_inline dom_parser_implementation() noexcept; + simdjson_really_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + simdjson_really_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + + simdjson_really_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; + simdjson_really_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; +}; // class dom_parser_implementation + +simdjson_really_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +simdjson_really_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +simdjson_really_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +simdjson_really_inline size_t dom_parser_implementation::capacity() const noexcept { + return _capacity; +} + +simdjson_really_inline size_t dom_parser_implementation::max_depth() const noexcept { + return _max_depth; +} + +simdjson_warn_unused +inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { + if (this->max_depth() != max_depth) { + error_code err = set_max_depth(max_depth); + if (err) { return err; } + } + if (_capacity != capacity) { + error_code err = set_capacity(capacity); + if (err) { return err; } + } + return SUCCESS; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +/* end file include/simdjson/internal/dom_parser_implementation.h */ +/* begin file include/simdjson/internal/isadetection.h */ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIMDJSON_INTERNAL_ISADETECTION_H +#define SIMDJSON_INTERNAL_ISADETECTION_H + +#include +#include +#if defined(_MSC_VER) +#include +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) +#include +#endif + +namespace simdjson { +namespace internal { + + +enum instruction_set { + DEFAULT = 0x0, + NEON = 0x1, + AVX2 = 0x4, + SSE42 = 0x8, + PCLMULQDQ = 0x10, + BMI1 = 0x20, + BMI2 = 0x40, + ALTIVEC = 0x80, + AVX512F = 0x100, + AVX512DQ = 0x200, + AVX512IFMA = 0x400, + AVX512PF = 0x800, + AVX512ER = 0x1000, + AVX512CD = 0x2000, + AVX512BW = 0x4000, + AVX512VL = 0x8000, + AVX512VBMI2 = 0x10000 +}; + +#if defined(__PPC64__) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::ALTIVEC; +} + +#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 + +#if defined(__ARM_NEON) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::NEON; +} + +#else // ARM without NEON + +static inline uint32_t detect_supported_architectures() { + return instruction_set::DEFAULT; +} + +#endif + +#elif defined(__x86_64__) || defined(_M_AMD64) // x64 + + +namespace { +// Can be found on Intel ISA Reference for CPUID +constexpr uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 +constexpr uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 +constexpr uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512f_bit = 1 << 16; ///< @private bit 16 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512dq_bit = 1 << 17; ///< @private bit 17 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512ifma_bit = 1 << 21; ///< @private bit 21 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512pf_bit = 1 << 26; ///< @private bit 26 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512er_bit = 1 << 27; ///< @private bit 27 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512cd_bit = 1 << 28; ///< @private bit 28 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512vl_bit = 1 << 31; ///< @private bit 31 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7 +constexpr uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 +constexpr uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 +} + + + +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, + uint32_t *edx) { +#if defined(_MSC_VER) + int cpu_info[4]; + __cpuid(cpu_info, *eax); + *eax = cpu_info[0]; + *ebx = cpu_info[1]; + *ecx = cpu_info[2]; + *edx = cpu_info[3]; +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) + uint32_t level = *eax; + __get_cpuid(level, eax, ebx, ecx, edx); +#else + uint32_t a = *eax, b, c = *ecx, d; + asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); + *eax = a; + *ebx = b; + *ecx = c; + *edx = d; +#endif +} + +static inline uint32_t detect_supported_architectures() { + uint32_t eax, ebx, ecx, edx; + uint32_t host_isa = 0x0; + + // ECX for EAX=0x7 + eax = 0x7; + ecx = 0x0; + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx & cpuid_avx2_bit) { + host_isa |= instruction_set::AVX2; + } + if (ebx & cpuid_bmi1_bit) { + host_isa |= instruction_set::BMI1; + } + + if (ebx & cpuid_bmi2_bit) { + host_isa |= instruction_set::BMI2; + } + + if (ebx & cpuid_avx512f_bit) { + host_isa |= instruction_set::AVX512F; + } + + if (ebx & cpuid_avx512dq_bit) { + host_isa |= instruction_set::AVX512DQ; + } + + if (ebx & cpuid_avx512ifma_bit) { + host_isa |= instruction_set::AVX512IFMA; + } + + if (ebx & cpuid_avx512pf_bit) { + host_isa |= instruction_set::AVX512PF; + } + + if (ebx & cpuid_avx512er_bit) { + host_isa |= instruction_set::AVX512ER; + } + + if (ebx & cpuid_avx512cd_bit) { + host_isa |= instruction_set::AVX512CD; + } + + if (ebx & cpuid_avx512bw_bit) { + host_isa |= instruction_set::AVX512BW; + } + + if (ebx & cpuid_avx512vl_bit) { + host_isa |= instruction_set::AVX512VL; + } + + if (ecx & cpuid_avx512vbmi2_bit) { + host_isa |= instruction_set::AVX512VBMI2; + } + + // EBX for EAX=0x1 + eax = 0x1; + cpuid(&eax, &ebx, &ecx, &edx); + + if (ecx & cpuid_sse42_bit) { + host_isa |= instruction_set::SSE42; + } + + if (ecx & cpuid_pclmulqdq_bit) { + host_isa |= instruction_set::PCLMULQDQ; + } + + return host_isa; +} +#else // fallback + + +static inline uint32_t detect_supported_architectures() { + return instruction_set::DEFAULT; +} + + +#endif // end SIMD extension detection code + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_ISADETECTION_H +/* end file include/simdjson/internal/isadetection.h */ +#include +#include +#include + +namespace simdjson { + +/** + * Validate the UTF-8 string. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if the string is valid UTF-8. + */ +simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; + + +/** + * Validate the UTF-8 string. + * + * @param sv the string_view to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_really_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { + return validate_utf8(sv.data(), sv.size()); +} + +/** + * Validate the UTF-8 string. + * + * @param p the string to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_really_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { + return validate_utf8(s.data(), s.size()); +} + +namespace dom { + class document; +} // namespace dom + +/** + * An implementation of simdjson for a particular CPU architecture. + * + * Also used to maintain the currently active implementation. The active implementation is + * automatically initialized on first use to the most advanced implementation supported by the host. + */ +class implementation { +public: + + /** + * The name of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + */ + virtual const std::string &name() const { return _name; } + + /** + * The description of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + */ + virtual const std::string &description() const { return _description; } + + /** + * The instruction sets this implementation is compiled against + * and the current CPU match. This function may poll the current CPU/system + * and should therefore not be called too often if performance is a concern. + * + * + * @return true if the implementation can be safely used on the current system (determined at runtime) + */ + bool supported_by_runtime_system() const; + + /** + * @private For internal implementation use + * + * The instruction sets this implementation is compiled against. + * + * @return a mask of all required `internal::instruction_set::` values + */ + virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }; + + /** + * @private For internal implementation use + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @param capacity The largest document that will be passed to the parser. + * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. + * @param dst The place to put the resulting parser implementation. + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + */ + virtual error_code create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr &dst + ) const noexcept = 0; + + /** + * @private For internal implementation use + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * + * Overridden by each implementation. + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; + + + /** + * Validate the UTF-8 string. + * + * Overridden by each implementation. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ + simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; + +protected: + /** @private Construct an implementation with the given name and description. For subclasses. */ + simdjson_really_inline implementation( + std::string_view name, + std::string_view description, + uint32_t required_instruction_sets + ) : + _name(name), + _description(description), + _required_instruction_sets(required_instruction_sets) + { + } + virtual ~implementation()=default; + +private: + /** + * The name of this implementation. + */ + const std::string _name; + + /** + * The description of this implementation. + */ + const std::string _description; + + /** + * Instruction sets required for this implementation. + */ + const uint32_t _required_instruction_sets; +}; + +/** @private */ +namespace internal { + +/** + * The list of available implementations compiled into simdjson. + */ +class available_implementation_list { +public: + /** Get the list of available implementations compiled into simdjson */ + simdjson_really_inline available_implementation_list() {} + /** Number of implementations */ + size_t size() const noexcept; + /** STL const begin() iterator */ + const implementation * const *begin() const noexcept; + /** STL const end() iterator */ + const implementation * const *end() const noexcept; + + /** + * Get the implementation with the given name. + * + * Case sensitive. + * + * const implementation *impl = simdjson::get_available_implementations()["westmere"]; + * if (!impl) { exit(1); } + * if (!imp->supported_by_runtime_system()) { exit(1); } + * simdjson::get_active_implementation() = impl; + * + * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" + * @return the implementation, or nullptr if the parse failed. + */ + const implementation * operator[](const std::string_view &name) const noexcept { + for (const implementation * impl : *this) { + if (impl->name() == name) { return impl; } + } + return nullptr; + } + + /** + * Detect the most advanced implementation supported by the current host. + * + * This is used to initialize the implementation on startup. + * + * const implementation *impl = simdjson::available_implementation::detect_best_supported(); + * simdjson::get_active_implementation() = impl; + * + * @return the most advanced supported implementation for the current host, or an + * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported + * implementation. Will never return nullptr. + */ + const implementation *detect_best_supported() const noexcept; +}; + +template +class atomic_ptr { +public: + atomic_ptr(T *_ptr) : ptr{_ptr} {} + + operator const T*() const { return ptr.load(); } + const T& operator*() const { return *ptr; } + const T* operator->() const { return ptr.load(); } + + operator T*() { return ptr.load(); } + T& operator*() { return *ptr; } + T* operator->() { return ptr.load(); } + atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } + +private: + std::atomic ptr; +}; + +} // namespace internal + +/** + * The list of available implementations compiled into simdjson. + */ +extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); + +/** + * The active implementation. + * + * Automatically initialized on first use to the most advanced implementation supported by this hardware. + */ +extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation(); + +} // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_H +/* end file include/simdjson/implementation.h */ + +// Inline functions +/* begin file include/simdjson/error-inl.h */ +#ifndef SIMDJSON_INLINE_ERROR_H +#define SIMDJSON_INLINE_ERROR_H + +#include +#include +#include + +namespace simdjson { +namespace internal { + // We store the error code so we can validate the error message is associated with the right code + struct error_code_info { + error_code code; + const char* message; // do not use a fancy std::string where a simple C string will do (no alloc, no destructor) + }; + // These MUST match the codes in error_code. We check this constraint in basictests. + extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[]; +} // namespace internal + + +inline const char *error_message(error_code error) noexcept { + // If you're using error_code, we're trusting you got it from the enum. + return internal::error_codes[int(error)].message; +} + +// deprecated function +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +inline const std::string error_message(int error) noexcept { + if (error < 0 || error >= error_code::NUM_ERROR_CODES) { + return internal::error_codes[UNEXPECTED_ERROR].message; + } + return internal::error_codes[error].message; +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept { + return out << error_message(error); +} + +namespace internal { + +// +// internal::simdjson_result_base inline implementation +// + +template +simdjson_really_inline void simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_really_inline error_code simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_really_inline error_code simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_really_inline T& simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_really_inline T&& simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_really_inline T&& simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_really_inline simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_really_inline const T& simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_really_inline T&& simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_really_inline simdjson_result_base::simdjson_result_base(T &&value, error_code error) noexcept + : std::pair(std::forward(value), error) {} +template +simdjson_really_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept + : simdjson_result_base(T{}, error) {} +template +simdjson_really_inline simdjson_result_base::simdjson_result_base(T &&value) noexcept + : simdjson_result_base(std::forward(value), SUCCESS) {} +template +simdjson_really_inline simdjson_result_base::simdjson_result_base() noexcept + : simdjson_result_base(T{}, UNINITIALIZED) {} + +} // namespace internal + +/// +/// simdjson_result inline implementation +/// + +template +simdjson_really_inline void simdjson_result::tie(T &value, error_code &error) && noexcept { + std::forward>(*this).tie(value, error); +} + +template +simdjson_warn_unused simdjson_really_inline error_code simdjson_result::get(T &value) && noexcept { + return std::forward>(*this).get(value); +} + +template +simdjson_really_inline error_code simdjson_result::error() const noexcept { + return internal::simdjson_result_base::error(); +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_really_inline T& simdjson_result::value() & noexcept(false) { + return internal::simdjson_result_base::value(); +} + +template +simdjson_really_inline T&& simdjson_result::value() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_really_inline T&& simdjson_result::take_value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_really_inline simdjson_result::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_really_inline const T& simdjson_result::value_unsafe() const& noexcept { + return internal::simdjson_result_base::value_unsafe(); +} + +template +simdjson_really_inline T&& simdjson_result::value_unsafe() && noexcept { + return std::forward>(*this).value_unsafe(); +} + +template +simdjson_really_inline simdjson_result::simdjson_result(T &&value, error_code error) noexcept + : internal::simdjson_result_base(std::forward(value), error) {} +template +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} +template +simdjson_really_inline simdjson_result::simdjson_result(T &&value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +template +simdjson_really_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} + +} // namespace simdjson + +#endif // SIMDJSON_INLINE_ERROR_H +/* end file include/simdjson/error-inl.h */ +/* begin file include/simdjson/padded_string-inl.h */ +#ifndef SIMDJSON_INLINE_PADDED_STRING_H +#define SIMDJSON_INLINE_PADDED_STRING_H + + +#include +#include +#include +#include + +namespace simdjson { +namespace internal { + +// The allocate_padded_buffer function is a low-level function to allocate memory +// with padding so we can read past the "length" bytes safely. It is used by +// the padded_string class automatically. It returns nullptr in case +// of error: the caller should check for a null pointer. +// The length parameter is the maximum size in bytes of the string. +// The caller is responsible to free the memory (e.g., delete[] (...)). +inline char *allocate_padded_buffer(size_t length) noexcept { + const size_t totalpaddedlength = length + SIMDJSON_PADDING; + if(totalpaddedlength(1UL<<20)) { + return nullptr; + } +#endif + + char *padded_buffer = new (std::nothrow) char[totalpaddedlength]; + if (padded_buffer == nullptr) { + return nullptr; + } + // We write zeroes in the padded region to avoid having uninitized + // garbage. If nothing else, garbage getting read might trigger a + // warning in a memory checking. + std::memset(padded_buffer + length, 0, totalpaddedlength - length); + return padded_buffer; +} // allocate_padded_buffer() + +} // namespace internal + + +inline padded_string::padded_string() noexcept {} +inline padded_string::padded_string(size_t length) noexcept + : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { +} +inline padded_string::padded_string(const char *data, size_t length) noexcept + : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { + if ((data != nullptr) && (data_ptr != nullptr)) { + std::memcpy(data_ptr, data, length); + } +} +// note: do not pass std::string arguments by value +inline padded_string::padded_string(const std::string & str_ ) noexcept + : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) { + if (data_ptr != nullptr) { + std::memcpy(data_ptr, str_.data(), str_.size()); + } +} +// note: do pass std::string_view arguments by value +inline padded_string::padded_string(std::string_view sv_) noexcept + : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) { + if(simdjson_unlikely(!data_ptr)) { + //allocation failed or zero size + viable_size=0; + return; + } + if (sv_.size()) { + std::memcpy(data_ptr, sv_.data(), sv_.size()); + } +} +inline padded_string::padded_string(padded_string &&o) noexcept + : viable_size(o.viable_size), data_ptr(o.data_ptr) { + o.data_ptr = nullptr; // we take ownership +} + +inline padded_string &padded_string::operator=(padded_string &&o) noexcept { + delete[] data_ptr; + data_ptr = o.data_ptr; + viable_size = o.viable_size; + o.data_ptr = nullptr; // we take ownership + o.viable_size = 0; + return *this; +} + +inline void padded_string::swap(padded_string &o) noexcept { + size_t tmp_viable_size = viable_size; + char *tmp_data_ptr = data_ptr; + viable_size = o.viable_size; + data_ptr = o.data_ptr; + o.data_ptr = tmp_data_ptr; + o.viable_size = tmp_viable_size; +} + +inline padded_string::~padded_string() noexcept { + delete[] data_ptr; +} + +inline size_t padded_string::size() const noexcept { return viable_size; } + +inline size_t padded_string::length() const noexcept { return viable_size; } + +inline const char *padded_string::data() const noexcept { return data_ptr; } + +inline char *padded_string::data() noexcept { return data_ptr; } + +inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); } + +inline padded_string::operator padded_string_view() const noexcept { + return padded_string_view(data(), length(), length() + SIMDJSON_PADDING); +} + +inline simdjson_result padded_string::load(std::string_view filename) noexcept { + // Open the file + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + std::FILE *fp = std::fopen(filename.data(), "rb"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (fp == nullptr) { + return IO_ERROR; + } + + // Get the file size + if(std::fseek(fp, 0, SEEK_END) < 0) { + std::fclose(fp); + return IO_ERROR; + } +#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS + __int64 llen = _ftelli64(fp); + if(llen == -1L) { + std::fclose(fp); + return IO_ERROR; + } +#else + long llen = std::ftell(fp); + if((llen < 0) || (llen == LONG_MAX)) { + std::fclose(fp); + return IO_ERROR; + } +#endif + + // Allocate the padded_string + size_t len = static_cast(llen); + padded_string s(len); + if (s.data() == nullptr) { + std::fclose(fp); + return MEMALLOC; + } + + // Read the padded_string + std::rewind(fp); + size_t bytes_read = std::fread(s.data(), 1, len, fp); + if (std::fclose(fp) != 0 || bytes_read != len) { + return IO_ERROR; + } + + return s; +} + +} // namespace simdjson + +#endif // SIMDJSON_INLINE_PADDED_STRING_H +/* end file include/simdjson/padded_string-inl.h */ +/* begin file include/simdjson/padded_string_view-inl.h */ +#ifndef SIMDJSON_PADDED_STRING_VIEW_INL_H +#define SIMDJSON_PADDED_STRING_VIEW_INL_H + + +#include +#include +#include +#include + +namespace simdjson { + +inline padded_string_view::padded_string_view(const char* s, size_t len, size_t capacity) noexcept + : std::string_view(s, len), _capacity(capacity) +{ +} + +inline padded_string_view::padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept + : padded_string_view(reinterpret_cast(s), len, capacity) +{ +} + +inline padded_string_view::padded_string_view(const std::string &s) noexcept + : std::string_view(s), _capacity(s.capacity()) +{ +} + +inline padded_string_view::padded_string_view(std::string_view s, size_t capacity) noexcept + : std::string_view(s), _capacity(capacity) +{ +} + +inline size_t padded_string_view::capacity() const noexcept { return _capacity; } + +inline size_t padded_string_view::padding() const noexcept { return capacity() - length(); } + +} // namespace simdjson + +#endif // SIMDJSON_PADDED_STRING_VIEW_INL_H +/* end file include/simdjson/padded_string_view-inl.h */ + +SIMDJSON_POP_DISABLE_WARNINGS + +#endif // SIMDJSON_BASE_H +/* end file include/simdjson/base.h */ + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_UNDESIRED_WARNINGS + +/* begin file include/simdjson/dom/array.h */ +#ifndef SIMDJSON_DOM_ARRAY_H +#define SIMDJSON_DOM_ARRAY_H + +/* begin file include/simdjson/internal/tape_ref.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_REF_H +#define SIMDJSON_INTERNAL_TAPE_REF_H + +/* begin file include/simdjson/internal/tape_type.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H +#define SIMDJSON_INTERNAL_TAPE_TYPE_H + +namespace simdjson { +namespace internal { + +/** + * The possible types in the tape. + */ +enum class tape_type { + ROOT = 'r', + START_ARRAY = '[', + START_OBJECT = '{', + END_ARRAY = ']', + END_OBJECT = '}', + STRING = '"', + INT64 = 'l', + UINT64 = 'u', + DOUBLE = 'd', + TRUE_VALUE = 't', + FALSE_VALUE = 'f', + NULL_VALUE = 'n' +}; // enum class tape_type + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H +/* end file include/simdjson/internal/tape_type.h */ + +namespace simdjson { + +namespace dom { + class document; +} + +namespace internal { + +constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; +constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF; + +/** + * A reference to an element on the tape. Internal only. + */ +class tape_ref { +public: + simdjson_really_inline tape_ref() noexcept; + simdjson_really_inline tape_ref(const dom::document *doc, size_t json_index) noexcept; + inline size_t after_element() const noexcept; + simdjson_really_inline tape_type tape_ref_type() const noexcept; + simdjson_really_inline uint64_t tape_value() const noexcept; + simdjson_really_inline bool is_double() const noexcept; + simdjson_really_inline bool is_int64() const noexcept; + simdjson_really_inline bool is_uint64() const noexcept; + simdjson_really_inline bool is_false() const noexcept; + simdjson_really_inline bool is_true() const noexcept; + simdjson_really_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. + simdjson_really_inline uint32_t matching_brace_index() const noexcept; + simdjson_really_inline uint32_t scope_count() const noexcept; + template + simdjson_really_inline T next_tape_value() const noexcept; + simdjson_really_inline uint32_t get_string_length() const noexcept; + simdjson_really_inline const char * get_c_str() const noexcept; + inline std::string_view get_string_view() const noexcept; + simdjson_really_inline bool is_document_root() const noexcept; + + /** The document this element references. */ + const dom::document *doc; + + /** The index of this element on `doc.tape[]` */ + size_t json_index; +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_REF_H +/* end file include/simdjson/internal/tape_ref.h */ + +namespace simdjson { + +namespace internal { +template +class string_builder; +} +namespace dom { + +class document; +class element; + +/** + * JSON array. + */ +class array { +public: + /** Create a new, invalid array */ + simdjson_really_inline array() noexcept; + + class iterator { + public: + using value_type = element; + using difference_type = std::ptrdiff_t; + + /** + * Get the actual value + */ + inline value_type operator*() const noexcept; + /** + * Get the next value. + * + * Part of the std::iterator interface. + */ + inline iterator& operator++() noexcept; + /** + * Get the next value. + * + * Part of the std::iterator interface. + */ + inline iterator operator++(int) noexcept; + /** + * Check if these values come from the same place in the JSON. + * + * Part of the std::iterator interface. + */ + inline bool operator!=(const iterator& other) const noexcept; + inline bool operator==(const iterator& other) const noexcept; + + inline bool operator<(const iterator& other) const noexcept; + inline bool operator<=(const iterator& other) const noexcept; + inline bool operator>=(const iterator& other) const noexcept; + inline bool operator>(const iterator& other) const noexcept; + + iterator() noexcept = default; + iterator(const iterator&) noexcept = default; + iterator& operator=(const iterator&) noexcept = default; + private: + simdjson_really_inline iterator(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class array; + }; + + /** + * Return the first array element. + * + * Part of the std::iterable interface. + */ + inline iterator begin() const noexcept; + /** + * One past the last array element. + * + * Part of the std::iterable interface. + */ + inline iterator end() const noexcept; + /** + * Get the size of the array (number of immediate children). + * It is a saturated value with a maximum of 0xFFFFFF: if the value + * is 0xFFFFFF then the size is 0xFFFFFF or greater. + */ + inline size_t size() const noexcept; + /** + * Get the total number of slots used by this array on the tape. + * + * Note that this is not the same thing as `size()`, which reports the + * number of actual elements within an array (not counting its children). + * + * Since an element can use 1 or 2 slots on the tape, you can only use this + * to figure out the total size of an array (including its children, + * recursively) if you know its structure ahead of time. + **/ + inline size_t number_of_slots() const noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * dom::parser parser; + * array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded); + * a.at_pointer("/0/foo/a/1") == 20 + * a.at_pointer("0")["foo"]["a"].at(1) == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity and + * is equivalent to the following: + * + * size_t i=0; + * for (auto element : *this) { + * if (i == index) { return element; } + * i++; + * } + * return INDEX_OUT_OF_BOUNDS; + * + * Avoid calling the at() function repeatedly. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + inline simdjson_result at(size_t index) const noexcept; + +private: + simdjson_really_inline array(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class element; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; +}; + + +} // namespace dom + +/** The result of a JSON conversion that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_really_inline simdjson_result() noexcept; ///< @private + simdjson_really_inline simdjson_result(dom::array value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + inline simdjson_result at(size_t index) const noexcept; + +#if SIMDJSON_EXCEPTIONS + inline dom::array::iterator begin() const noexcept(false); + inline dom::array::iterator end() const noexcept(false); + inline size_t size() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + + + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +#include + +namespace std { +namespace ranges { +template<> +inline constexpr bool enable_view = true; +#if SIMDJSON_EXCEPTIONS +template<> +inline constexpr bool enable_view> = true; +#endif // SIMDJSON_EXCEPTIONS +} // namespace ranges +} // namespace std +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_DOM_ARRAY_H +/* end file include/simdjson/dom/array.h */ +/* begin file include/simdjson/dom/document_stream.h */ +#ifndef SIMDJSON_DOCUMENT_STREAM_H +#define SIMDJSON_DOCUMENT_STREAM_H + +/* begin file include/simdjson/dom/parser.h */ +#ifndef SIMDJSON_DOM_PARSER_H +#define SIMDJSON_DOM_PARSER_H + +/* begin file include/simdjson/dom/document.h */ +#ifndef SIMDJSON_DOM_DOCUMENT_H +#define SIMDJSON_DOM_DOCUMENT_H + +#include +#include + +namespace simdjson { +namespace dom { + +class element; + +/** + * A parsed JSON document. + * + * This class cannot be copied, only moved, to avoid unintended allocations. + */ +class document { +public: + /** + * Create a document container with zero capacity. + * + * The parser will allocate capacity as needed. + */ + document() noexcept = default; + ~document() noexcept = default; + + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed and it is invalidated. + */ + document(document &&other) noexcept = default; + /** @private */ + document(const document &) = delete; // Disallow copying + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed. + */ + document &operator=(document &&other) noexcept = default; + /** @private */ + document &operator=(const document &) = delete; // Disallow copying + + /** + * Get the root element of this document as a JSON array. + */ + element root() const noexcept; + + /** + * @private Dump the raw tape for debugging. + * + * @param os the stream to output to. + * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). + */ + bool dump_raw_tape(std::ostream &os) const noexcept; + + /** @private Structural values. */ + std::unique_ptr tape{}; + + /** @private String values. + * + * Should be at least byte_capacity. + */ + std::unique_ptr string_buf{}; + /** @private Allocate memory to support + * input JSON documents of up to len bytes. + * + * When calling this function, you lose + * all the data. + * + * The memory allocation is strict: you + * can you use this function to increase + * or lower the amount of allocated memory. + * Passsing zero clears the memory. + */ + error_code allocate(size_t len) noexcept; + /** @private Capacity in bytes, in terms + * of how many bytes of input JSON we can + * support. + */ + size_t capacity() const noexcept; + + +private: + size_t allocated_capacity{0}; + friend class parser; +}; // class document + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file include/simdjson/dom/document.h */ +#include +#include +#include + +namespace simdjson { + +namespace dom { + +class document_stream; +class element; + +/** The default batch size for parser.parse_many() and parser.load_many() */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes). + */ +static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32; + +/** + * A persistent document parser. + * + * The parser is designed to be reused, holding the internal buffers necessary to do parsing, + * as well as memory for a single document. The parsed document is overwritten on each parse. + * + * This class cannot be copied, only moved, to avoid unintended allocations. + * + * @note Moving a parser instance may invalidate "dom::element" instances. If you need to + * preserve both the "dom::element" instances and the parser, consider wrapping the parser + * instance in a std::unique_ptr instance: + * + * std::unique_ptr parser(new dom::parser{}); + * auto error = parser->load(f).get(root); + * + * You can then move std::unique_ptr safely. + * + * @note This is not thread safe: one parser cannot produce two documents at the same time! + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + * + * @param max_capacity The maximum document length the parser can automatically handle. The parser + * will allocate more capacity on an as needed basis (when it sees documents too big to handle) + * up to this amount. The parser still starts with zero capacity no matter what this number is: + * to allocate an initial capacity, call allocate() after constructing the parser. + * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). + */ + simdjson_really_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + /** + * Take another parser's buffers and state. + * + * @param other The parser to take. Its capacity is zeroed. + */ + simdjson_really_inline parser(parser &&other) noexcept; + parser(const parser &) = delete; ///< @private Disallow copying + /** + * Take another parser's buffers and state. + * + * @param other The parser to take. Its capacity is zeroed. + */ + simdjson_really_inline parser &operator=(parser &&other) noexcept; + parser &operator=(const parser &) = delete; ///< @private Disallow copying + + /** Deallocate the JSON parser. */ + ~parser()=default; + + /** + * Load a JSON document from a file and return a reference to it. + * + * dom::parser parser; + * const element doc = parser.load("jsonexamples/twitter.json"); + * + * The function is eager: the file's content is loaded in memory inside the parser instance + * and immediately parsed. The file can be deleted after the `parser.load` call. + * + * ### IMPORTANT: Document Lifetime + * + * The JSON document still lives in the parser: this is the most efficient way to parse JSON + * documents because it reuses the same buffers, but you *must* use the document before you + * destroy the parser or call parse() again. + * + * Moving the parser instance is safe, but it invalidates the element instances. You may store + * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like + * so: `std::unique_ptr parser(new dom::parser{});`. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than the file length, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param path The path to load. + * @return The document, or an error: + * - IO_ERROR if there was an error opening or reading the file. + * Be mindful that on some 32-bit systems, + * the file size might be limited to 2 GB. + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result load(const std::string &path) & noexcept; + inline simdjson_result load(const std::string &path) && = delete ; + /** + * Parse a JSON document and return a temporary reference to it. + * + * dom::parser parser; + * element doc_root = parser.parse(buf, len); + * + * The function eagerly parses the input: the input can be modified and discarded after + * the `parser.parse(buf, len)` call has completed. + * + * ### IMPORTANT: Document Lifetime + * + * The JSON document still lives in the parser: this is the most efficient way to parse JSON + * documents because it reuses the same buffers, but you *must* use the document before you + * destroy the parser or call parse() again. + * + * Moving the parser instance is safe, but it invalidates the element instances. You may store + * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like + * so: `std::unique_ptr parser(new dom::parser{});`. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. + * + * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding, + * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: + * + * const char *json = R"({"key":"value"})"; + * const size_t json_len = std::strlen(json); + * simdjson::dom::parser parser; + * simdjson::dom::element element = parser.parse(json, json_len); + * + * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)), + * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end. + * The benefit of setting realloc_if_needed to false is that you avoid a temporary + * memory allocation and a copy. + * + * The padded bytes may be read. It is not important how you initialize + * these bytes though we recommend a sensible default like null character values or spaces. + * For example, the following low-level code is safe: + * + * const char *json = R"({"key":"value"})"; + * const size_t json_len = std::strlen(json); + * std::unique_ptr padded_json_copy{new char[json_len + SIMDJSON_PADDING]}; + * std::memcpy(padded_json_copy.get(), json, json_len); + * std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING); + * simdjson::dom::parser parser; + * simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false); + * + * ### Parser Capacity + * + * If the parser's current capacity is less than len, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless + * realloc_if_needed is true. + * @param len The length of the JSON. + * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. + * @return An element pointing at the root of the document, or an error: + * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, + * and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; + inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_really_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_really_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_really_inline simdjson_result parse(const std::string &s) & noexcept; + simdjson_really_inline simdjson_result parse(const std::string &s) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_really_inline simdjson_result parse(const padded_string &s) & noexcept; + simdjson_really_inline simdjson_result parse(const padded_string &s) && =delete; + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_really_inline simdjson_result parse(const char *buf) noexcept = delete; + + /** + * Parse a JSON document into a provide document instance and return a temporary reference to it. + * It is similar to the function `parse` except that instead of parsing into the internal + * `document` instance associated with the parser, it allows the user to provide a document + * instance. + * + * dom::parser parser; + * dom::document doc; + * element doc_root = parser.parse_into_document(doc, buf, len); + * + * The function eagerly parses the input: the input can be modified and discarded after + * the `parser.parse(buf, len)` call has completed. + * + * ### IMPORTANT: Document Lifetime + * + * After the call to parse_into_document, the parser is no longer needed. + * + * The JSON document lives in the document instance: you must keep the document + * instance alive while you navigate through it (i.e., used the returned value from + * parse_into_document). You are encourage to reuse the document instance + * many times with new data to avoid reallocations: + * + * dom::document doc; + * element doc_root1 = parser.parse_into_document(doc, buf1, len); + * //... doc_root1 is a pointer inside doc + * element doc_root2 = parser.parse_into_document(doc, buf1, len); + * //... doc_root2 is a pointer inside doc + * // at this point doc_root1 is no longer safe + * + * Moving the document instance is safe, but it invalidates the element instances. After + * moving a document, you can recover safe access to the document root with its `root()` method. + * + * @param doc The document instance where the parsed data will be stored (on success). + * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless + * realloc_if_needed is true. + * @param len The length of the JSON. + * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. + * @return An element pointing at the root of document, or an error: + * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, + * and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; + inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_really_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_really_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_really_inline simdjson_result parse_into_document(document& doc, const std::string &s) & noexcept; + simdjson_really_inline simdjson_result parse_into_document(document& doc, const std::string &s) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_really_inline simdjson_result parse_into_document(document& doc, const padded_string &s) & noexcept; + simdjson_really_inline simdjson_result parse_into_document(document& doc, const padded_string &s) && =delete; + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_really_inline simdjson_result parse_into_document(document& doc, const char *buf) noexcept = delete; + + /** + * Load a file containing many JSON documents. + * + * dom::parser parser; + * for (const element doc : parser.load_many(path)) { + * cout << std::string(doc["title"]) << endl; + * } + * + * The file is loaded in memory and can be safely deleted after the `parser.load_many(path)` + * function has returned. The memory is held by the `parser` instance. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * And, possibly, no document many have been parsed when the `parser.load_many(path)` function + * returned. + * + * ### Format + * + * The file must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * Documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with whitespace. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excesively small values may impact negatively the + * performance. + * + * ### Error Handling + * + * All errors are returned during iteration: if there is a global error such as memory allocation, + * it will be yielded as the first result. Iteration always stops after the first error. + * + * As with all other simdjson methods, non-exception error handling is readily available through + * the same interface, requiring you to check the error before using the document: + * + * dom::parser parser; + * dom::document_stream docs; + * auto error = parser.load_many(path).get(docs); + * if (error) { cerr << error << endl; exit(1); } + * for (auto doc : docs) { + * std::string_view title; + * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } + * cout << title << endl; + * } + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param path File name pointing at the concatenated JSON to parse. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 1MB (as simdjson::dom::DEFAULT_BATCH_SIZE), which has been a reasonable sweet + * spot in our tests. + * If you set the batch_size to a value smaller than simdjson::dom::MINIMAL_BATCH_SIZE + * (currently 32B), it will be replaced by simdjson::dom::MINIMAL_BATCH_SIZE. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - IO_ERROR if there was an error opening or reading the file. + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + + /** + * Parse a buffer containing many JSON documents. + * + * dom::parser parser; + * for (element doc : parser.parse_many(buf, len)) { + * cout << std::string(doc["title"]) << endl; + * } + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * And, possibly, no document many have been parsed when the `parser.load_many(path)` function + * returned. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. In particular, the following is unsafe and will not compile: + * + * auto docs = parser.parse_many("[\"temporary data\"]"_padded); + * // here the string "[\"temporary data\"]" may no longer exist in memory + * // the parser instance may not have even accessed the input yet + * for (element doc : docs) { + * cout << std::string(doc["title"]) << endl; + * } + * + * The following is safe: + * + * auto json = "[\"temporary data\"]"_padded; + * auto docs = parser.parse_many(json); + * for (element doc : docs) { + * cout << std::string(doc["title"]) << endl; + * } + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with whitespace. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excesively small values may impact negatively the + * performance. + * + * ### Error Handling + * + * All errors are returned during iteration: if there is a global error such as memory allocation, + * it will be yielded as the first result. Iteration always stops after the first error. + * + * As with all other simdjson methods, non-exception error handling is readily available through + * the same interface, requiring you to check the error before using the document: + * + * dom::parser parser; + * dom::document_stream docs; + * auto error = parser.load_many(path).get(docs); + * if (error) { cerr << error << endl; exit(1); } + * for (auto doc : docs) { + * std::string_view title; + * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } + * cout << title << endl; + * } + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse_many(const uint8_t *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const char *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result parse_many(const char *buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept = delete; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + /** + * @private deprecated because it returns bool instead of error_code, which is our standard for + * failures. Use allocate() instead. + * + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return true if successful, false if allocation failed. + */ + [[deprecated("Use allocate() instead.")]] + simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + simdjson_really_inline size_t capacity() const noexcept; + + /** + * The largest document this parser can automatically support. + * + * The parser may reallocate internal buffers as needed up to this amount. + * + * @return Maximum capacity, in bytes. + */ + simdjson_really_inline size_t max_capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + simdjson_really_inline size_t max_depth() const noexcept; + + /** + * Set max_capacity. This is the largest document this parser can automatically support. + * + * The parser may reallocate internal buffers as needed up to this amount as documents are passed + * to it. + * + * Note: To avoid limiting the memory to an absurd value, such as zero or two bytes, + * iff you try to set max_capacity to a value lower than MINIMAL_DOCUMENT_CAPACITY, + * then the maximal capacity is set to MINIMAL_DOCUMENT_CAPACITY. + * + * This call will not allocate or deallocate, even if capacity is currently above max_capacity. + * + * @param max_capacity The new maximum capacity, in bytes. + */ + simdjson_really_inline void set_max_capacity(size_t max_capacity) noexcept; + +#ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; +#endif + /** @private Use the new DOM API instead */ + class Iterator; + /** @private Use simdjson_error instead */ + using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error; + + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + + /** @private Use `if (parser.parse(...).error())` instead */ + bool valid{false}; + /** @private Use `parser.parse(...).error()` instead */ + error_code error{UNINITIALIZED}; + + /** @private Use `parser.parse(...).value()` instead */ + document doc{}; + + /** @private returns true if the document parsed was valid */ + [[deprecated("Use the result of parser.parse() instead")]] + inline bool is_valid() const noexcept; + + /** + * @private return an error code corresponding to the last parsing attempt, see + * simdjson.h will return UNINITIALIZED if no parsing was attempted + */ + [[deprecated("Use the result of parser.parse() instead")]] + inline int get_error_code() const noexcept; + + /** @private return the string equivalent of "get_error_code" */ + [[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]] + inline std::string get_error_message() const noexcept; + + /** @private */ + [[deprecated("Use cout << on the result of parser.parse() instead")]] + inline bool print_json(std::ostream &os) const noexcept; + + /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */ + inline bool dump_raw_tape(std::ostream &os) const noexcept; + + +private: + /** + * The maximum document length this parser will automatically support. + * + * The parser will not be automatically allocated above this amount. + */ + size_t _max_capacity; + + /** + * The loaded buffer (reused each time load() is called) + */ + std::unique_ptr loaded_bytes; + + /** Capacity of loaded_bytes buffer. */ + size_t _loaded_bytes_capacity{0}; + + // all nodes are stored on the doc.tape using a 64-bit word. + // + // strings, double and ints are stored as + // a 64-bit word with a pointer to the actual value + // + // + // + // for objects or arrays, store [ or { at the beginning and } and ] at the + // end. For the openings ([ or {), we annotate them with a reference to the + // location on the doc.tape of the end, and for then closings (} and ]), we + // annotate them with a reference to the location of the opening + // + // + + /** + * Ensure we have enough capacity to handle at least desired_capacity bytes, + * and auto-allocate if not. This also allocates memory if needed in the + * internal document. + */ + inline error_code ensure_capacity(size_t desired_capacity) noexcept; + /** + * Ensure we have enough capacity to handle at least desired_capacity bytes, + * and auto-allocate if not. This also allocates memory if needed in the + * provided document. + */ + inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept; + + /** Read the file into loaded_bytes */ + inline simdjson_result read_file(const std::string &path) noexcept; + + friend class parser::Iterator; + friend class document_stream; + + +}; // class parser + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOM_PARSER_H +/* end file include/simdjson/dom/parser.h */ +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace dom { + + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + dom::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; +}; +#endif + +/** + * A forward-only stream of documents. + * + * Produced by parser::parse_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * error = parser.parse_many(json).get(docs); + * ``` + */ + simdjson_really_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_really_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_really_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_really_inline ~document_stream() noexcept; + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.parse_many(json,window); + * for(auto doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + /** + * An iterator through a forward-only stream of documents. + */ + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_really_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_really_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_really_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_really_inline size_t current_index() const noexcept; + /** + * @private + * + * Gives a view of the current document. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * std::string_view v = i->source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_really_inline std::string_view source() const noexcept; + + private: + simdjson_really_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + friend class document_stream; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_really_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_really_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_really_inline document_stream( + dom::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** + * Pass the next batch through stage 1 and return when finished. + * When threads are enabled, this may wait for the stage 1 thread to finish. + */ + inline void load_batch() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(dom::parser &p, size_t batch_start) noexcept; + + dom::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; +#ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + friend struct stage1_worker; + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + dom::parser stage1_thread_parser{}; +#endif // SIMDJSON_THREADS_ENABLED + + friend class dom::parser; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; + +}; // class document_stream + +} // namespace dom + +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_really_inline simdjson_result() noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline dom::document_stream::iterator begin() noexcept(false); + simdjson_really_inline dom::document_stream::iterator end() noexcept(false); +#else // SIMDJSON_EXCEPTIONS +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] + simdjson_really_inline dom::document_stream::iterator begin() noexcept; + [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] + simdjson_really_inline dom::document_stream::iterator end() noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API +#endif // SIMDJSON_EXCEPTIONS +}; // struct simdjson_result + +} // namespace simdjson + +#endif // SIMDJSON_DOCUMENT_STREAM_H +/* end file include/simdjson/dom/document_stream.h */ +/* begin file include/simdjson/dom/element.h */ +#ifndef SIMDJSON_DOM_ELEMENT_H +#define SIMDJSON_DOM_ELEMENT_H + +#include + +namespace simdjson { +namespace internal { +template +class string_builder; +} +namespace dom { +class array; +class document; +class object; + +/** + * The actual concrete type of a JSON element + * This is the type it is most easily cast to with get<>. + */ +enum class element_type { + ARRAY = '[', ///< dom::array + OBJECT = '{', ///< dom::object + INT64 = 'l', ///< int64_t + UINT64 = 'u', ///< uint64_t: any integer that fits in uint64_t but *not* int64_t + DOUBLE = 'd', ///< double: Any number with a "." or "e" that fits in double. + STRING = '"', ///< std::string_view + BOOL = 't', ///< bool + NULL_VALUE = 'n' ///< null +}; + +/** + * A JSON element. + * + * References an element in a JSON document, representing a JSON null, boolean, string, number, + * array or object. + */ +class element { +public: + /** Create a new, invalid element. */ + simdjson_really_inline element() noexcept; + + /** The type of this element. */ + simdjson_really_inline element_type type() const noexcept; + + /** + * Cast this element to an array. + * + * @returns An object that can be used to iterate the array, or: + * INCORRECT_TYPE if the JSON element is not an array. + */ + inline simdjson_result get_array() const noexcept; + /** + * Cast this element to an object. + * + * @returns An object that can be used to look up or iterate the object's fields, or: + * INCORRECT_TYPE if the JSON element is not an object. + */ + inline simdjson_result get_object() const noexcept; + /** + * Cast this element to a null-terminated C string. + * + * The string is guaranteed to be valid UTF-8. + * + * The length of the string is given by get_string_length(). Because JSON strings + * may contain null characters, it may be incorrect to use strlen to determine the + * string length. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will + * be invalidated the next time it parses a document or when it is destroyed. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_c_str() const noexcept; + /** + * Gives the length in bytes of the string. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A string length in bytes. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_string_length() const noexcept; + /** + * Cast this element to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it + * parses a document or when it is destroyed. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_string() const noexcept; + /** + * Cast this element to a signed integer. + * + * @returns A signed 64-bit integer. + * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE + * if it is negative. + */ + inline simdjson_result get_int64() const noexcept; + /** + * Cast this element to an unsigned integer. + * + * @returns An unsigned 64-bit integer. + * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE + * if it is too large. + */ + inline simdjson_result get_uint64() const noexcept; + /** + * Cast this element to a double floating-point. + * + * @returns A double value. + * Returns INCORRECT_TYPE if the JSON element is not a number. + */ + inline simdjson_result get_double() const noexcept; + /** + * Cast this element to a bool. + * + * @returns A bool value. + * Returns INCORRECT_TYPE if the JSON element is not a boolean. + */ + inline simdjson_result get_bool() const noexcept; + + /** + * Whether this element is a json array. + * + * Equivalent to is(). + */ + inline bool is_array() const noexcept; + /** + * Whether this element is a json object. + * + * Equivalent to is(). + */ + inline bool is_object() const noexcept; + /** + * Whether this element is a json string. + * + * Equivalent to is() or is(). + */ + inline bool is_string() const noexcept; + /** + * Whether this element is a json number that fits in a signed 64-bit integer. + * + * Equivalent to is(). + */ + inline bool is_int64() const noexcept; + /** + * Whether this element is a json number that fits in an unsigned 64-bit integer. + * + * Equivalent to is(). + */ + inline bool is_uint64() const noexcept; + /** + * Whether this element is a json number that fits in a double. + * + * Equivalent to is(). + */ + inline bool is_double() const noexcept; + + /** + * Whether this element is a json number. + * + * Both integers and floating points will return true. + */ + inline bool is_number() const noexcept; + + /** + * Whether this element is a json `true` or `false`. + * + * Equivalent to is(). + */ + inline bool is_bool() const noexcept; + /** + * Whether this element is a json `null`. + */ + inline bool is_null() const noexcept; + + /** + * Tell whether the value can be cast to provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + */ + template + simdjson_really_inline bool is() const noexcept; + + /** + * Get the value as the provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array() or get_string() instead. + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @returns The value cast to the given type, or: + * INCORRECT_TYPE if the value cannot be cast to the given type. + */ + + template + inline simdjson_result get() const noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get the value as the provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @param value The variable to set to the value. May not be set if there is an error. + * + * @returns The error that occurred, or SUCCESS if there was no error. + */ + template + simdjson_warn_unused simdjson_really_inline error_code get(T &value) const noexcept; + + /** + * Get the value as the provided type (T), setting error if it's not the given type. + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @param value The variable to set to the given type. value is undefined if there is an error. + * @param error The variable to store the error. error is set to error_code::SUCCEED if there is an error. + */ + template + inline void tie(T &value, error_code &error) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Read this element as a boolean. + * + * @return The boolean value + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean. + */ + inline operator bool() const noexcept(false); + + /** + * Read this element as a null-terminated UTF-8 string. + * + * Be mindful that JSON allows strings to contain null characters. + * + * Does *not* convert other types to a string; requires that the JSON type of the element was + * an actual string. + * + * @return The string value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. + */ + inline explicit operator const char*() const noexcept(false); + + /** + * Read this element as a null-terminated UTF-8 string. + * + * Does *not* convert other types to a string; requires that the JSON type of the element was + * an actual string. + * + * @return The string value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. + */ + inline operator std::string_view() const noexcept(false); + + /** + * Read this element as an unsigned integer. + * + * @return The integer value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative + */ + inline operator uint64_t() const noexcept(false); + /** + * Read this element as an signed integer. + * + * @return The integer value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits + */ + inline operator int64_t() const noexcept(false); + /** + * Read this element as an double. + * + * @return The double value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative + */ + inline operator double() const noexcept(false); + /** + * Read this element as a JSON array. + * + * @return The JSON array. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline operator array() const noexcept(false); + /** + * Read this element as a JSON object (key/value pairs). + * + * @return The JSON object. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object + */ + inline operator object() const noexcept(false); + + /** + * Iterate over each element in this array. + * + * @return The beginning of the iteration. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline dom::array::iterator begin() const noexcept(false); + + /** + * Iterate over each element in this array. + * + * @return The end of the iteration. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline dom::array::iterator end() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](std::string_view key) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](const char *key) const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * dom::parser parser; + * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * doc.at_pointer("/foo/a/1") == 20 + * doc.at_pointer("/foo")["a"].at(1) == 20 + * doc.at_pointer("")["foo"]["a"].at(1) == 20 + * + * It is allowed for a key to be the empty string: + * + * dom::parser parser; + * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("//a/1") == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + /** + * + * Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard + * and allowed the following : + * + * dom::parser parser; + * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * doc.at("foo/a/1") == 20 + * + * Though it is intuitive, it is not compliant with RFC 6901 + * https://tools.ietf.org/html/rfc6901 + * + * For standard compliance, use the at_pointer function instead. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] + inline simdjson_result at(const std::string_view json_pointer) const noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API + + /** + * Get the value at the given index. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + inline simdjson_result at(size_t index) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key(std::string_view key) const noexcept; + + /** + * Get the value associated with the given key in a case-insensitive manner. + * + * Note: The key will be matched against **unescaped** JSON. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + + /** @private for debugging. Prints out the root element. */ + inline bool dump_raw_tape(std::ostream &out) const noexcept; + +private: + simdjson_really_inline element(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class document; + friend class object; + friend class array; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; + +}; + +} // namespace dom + +/** The result of a JSON navigation that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_really_inline simdjson_result() noexcept; ///< @private + simdjson_really_inline simdjson_result(dom::element &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_really_inline simdjson_result type() const noexcept; + template + simdjson_really_inline bool is() const noexcept; + template + simdjson_really_inline simdjson_result get() const noexcept; + template + simdjson_warn_unused simdjson_really_inline error_code get(T &value) const noexcept; + + simdjson_really_inline simdjson_result get_array() const noexcept; + simdjson_really_inline simdjson_result get_object() const noexcept; + simdjson_really_inline simdjson_result get_c_str() const noexcept; + simdjson_really_inline simdjson_result get_string_length() const noexcept; + simdjson_really_inline simdjson_result get_string() const noexcept; + simdjson_really_inline simdjson_result get_int64() const noexcept; + simdjson_really_inline simdjson_result get_uint64() const noexcept; + simdjson_really_inline simdjson_result get_double() const noexcept; + simdjson_really_inline simdjson_result get_bool() const noexcept; + + simdjson_really_inline bool is_array() const noexcept; + simdjson_really_inline bool is_object() const noexcept; + simdjson_really_inline bool is_string() const noexcept; + simdjson_really_inline bool is_int64() const noexcept; + simdjson_really_inline bool is_uint64() const noexcept; + simdjson_really_inline bool is_double() const noexcept; + simdjson_really_inline bool is_number() const noexcept; + simdjson_really_inline bool is_bool() const noexcept; + simdjson_really_inline bool is_null() const noexcept; + + simdjson_really_inline simdjson_result operator[](std::string_view key) const noexcept; + simdjson_really_inline simdjson_result operator[](const char *key) const noexcept; + simdjson_really_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] + simdjson_really_inline simdjson_result at(const std::string_view json_pointer) const noexcept; + simdjson_really_inline simdjson_result at(size_t index) const noexcept; + simdjson_really_inline simdjson_result at_key(std::string_view key) const noexcept; + simdjson_really_inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline operator bool() const noexcept(false); + simdjson_really_inline explicit operator const char*() const noexcept(false); + simdjson_really_inline operator std::string_view() const noexcept(false); + simdjson_really_inline operator uint64_t() const noexcept(false); + simdjson_really_inline operator int64_t() const noexcept(false); + simdjson_really_inline operator double() const noexcept(false); + simdjson_really_inline operator dom::array() const noexcept(false); + simdjson_really_inline operator dom::object() const noexcept(false); + + simdjson_really_inline dom::array::iterator begin() const noexcept(false); + simdjson_really_inline dom::array::iterator end() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + + +} // namespace simdjson + +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file include/simdjson/dom/element.h */ +/* begin file include/simdjson/dom/object.h */ +#ifndef SIMDJSON_DOM_OBJECT_H +#define SIMDJSON_DOM_OBJECT_H + + +namespace simdjson { +namespace internal { +template +class string_builder; +} +namespace dom { + +class document; +class element; +class key_value_pair; + +/** + * JSON object. + */ +class object { +public: + /** Create a new, invalid object */ + simdjson_really_inline object() noexcept; + + class iterator { + public: + using value_type = key_value_pair; + using difference_type = std::ptrdiff_t; + + /** + * Get the actual key/value pair + */ + inline const value_type operator*() const noexcept; + /** + * Get the next key/value pair. + * + * Part of the std::iterator interface. + * + */ + inline iterator& operator++() noexcept; + /** + * Get the next key/value pair. + * + * Part of the std::iterator interface. + * + */ + inline iterator operator++(int) noexcept; + /** + * Check if these values come from the same place in the JSON. + * + * Part of the std::iterator interface. + */ + inline bool operator!=(const iterator& other) const noexcept; + inline bool operator==(const iterator& other) const noexcept; + + inline bool operator<(const iterator& other) const noexcept; + inline bool operator<=(const iterator& other) const noexcept; + inline bool operator>=(const iterator& other) const noexcept; + inline bool operator>(const iterator& other) const noexcept; + /** + * Get the key of this key/value pair. + */ + inline std::string_view key() const noexcept; + /** + * Get the length (in bytes) of the key in this key/value pair. + * You should expect this function to be faster than key().size(). + */ + inline uint32_t key_length() const noexcept; + /** + * Returns true if the key in this key/value pair is equal + * to the provided string_view. + */ + inline bool key_equals(std::string_view o) const noexcept; + /** + * Returns true if the key in this key/value pair is equal + * to the provided string_view in a case-insensitive manner. + * Case comparisons may only be handled correctly for ASCII strings. + */ + inline bool key_equals_case_insensitive(std::string_view o) const noexcept; + /** + * Get the key of this key/value pair. + */ + inline const char *key_c_str() const noexcept; + /** + * Get the value of this key/value pair. + */ + inline element value() const noexcept; + + iterator() noexcept = default; + iterator(const iterator&) noexcept = default; + iterator& operator=(const iterator&) noexcept = default; + private: + simdjson_really_inline iterator(const internal::tape_ref &tape) noexcept; + + internal::tape_ref tape; + + friend class object; + }; + + /** + * Return the first key/value pair. + * + * Part of the std::iterable interface. + */ + inline iterator begin() const noexcept; + /** + * One past the last key/value pair. + * + * Part of the std::iterable interface. + */ + inline iterator end() const noexcept; + /** + * Get the size of the object (number of keys). + * It is a saturated value with a maximum of 0xFFFFFF: if the value + * is 0xFFFFFF then the size is 0xFFFFFF or greater. + */ + inline size_t size() const noexcept; + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](std::string_view key) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](const char *key) const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * dom::parser parser; + * object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("/foo/a/1") == 20 + * obj.at_pointer("/foo")["a"].at(1) == 20 + * + * It is allowed for a key to be the empty string: + * + * dom::parser parser; + * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("//a/1") == 20 + * obj.at_pointer("/")["a"].at(1) == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key(std::string_view key) const noexcept; + + /** + * Get the value associated with the given key in a case-insensitive manner. + * It is only guaranteed to work over ASCII inputs. + * + * Note: The key will be matched against **unescaped** JSON. + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +private: + simdjson_really_inline object(const internal::tape_ref &tape) noexcept; + + internal::tape_ref tape; + + friend class element; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; +}; + +/** + * Key/value pair in an object. + */ +class key_value_pair { +public: + /** key in the key-value pair **/ + std::string_view key; + /** value in the key-value pair **/ + element value; + +private: + simdjson_really_inline key_value_pair(std::string_view _key, element _value) noexcept; + friend class object; +}; + +} // namespace dom + +/** The result of a JSON conversion that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_really_inline simdjson_result() noexcept; ///< @private + simdjson_really_inline simdjson_result(dom::object value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + + inline simdjson_result operator[](std::string_view key) const noexcept; + inline simdjson_result operator[](const char *key) const noexcept; + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + inline simdjson_result at_key(std::string_view key) const noexcept; + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +#if SIMDJSON_EXCEPTIONS + inline dom::object::iterator begin() const noexcept(false); + inline dom::object::iterator end() const noexcept(false); + inline size_t size() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +#include + +namespace std { +namespace ranges { +template<> +inline constexpr bool enable_view = true; +#if SIMDJSON_EXCEPTIONS +template<> +inline constexpr bool enable_view> = true; +#endif // SIMDJSON_EXCEPTIONS +} // namespace ranges +} // namespace std +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_DOM_OBJECT_H +/* end file include/simdjson/dom/object.h */ +/* begin file include/simdjson/dom/serialization.h */ +#ifndef SIMDJSON_SERIALIZATION_H +#define SIMDJSON_SERIALIZATION_H + +#include + +namespace simdjson { + +/** + * The string_builder template and mini_formatter class + * are not part of our public API and are subject to change + * at any time! + */ +namespace internal { + +class mini_formatter; + +/** + * @private The string_builder template allows us to construct + * a string from a document element. It is parametrized + * by a "formatter" which handles the details. Thus + * the string_builder template could support both minification + * and prettification, and various other tradeoffs. + */ +template +class string_builder { +public: + /** Construct an initially empty builder, would print the empty string **/ + string_builder() = default; + /** Append an element to the builder (to be printed) **/ + inline void append(simdjson::dom::element value); + /** Append an array to the builder (to be printed) **/ + inline void append(simdjson::dom::array value); + /** Append an object to the builder (to be printed) **/ + inline void append(simdjson::dom::object value); + /** Reset the builder (so that it would print the empty string) **/ + simdjson_really_inline void clear(); + /** + * Get access to the string. The string_view is owned by the builder + * and it is invalid to use it after the string_builder has been + * destroyed. + * However you can make a copy of the string_view on memory that you + * own. + */ + simdjson_really_inline std::string_view str() const; + /** Append a key_value_pair to the builder (to be printed) **/ + simdjson_really_inline void append(simdjson::dom::key_value_pair value); +private: + formatter format{}; +}; + +/** + * @private This is the class that we expect to use with the string_builder + * template. It tries to produce a compact version of the JSON element + * as quickly as possible. + */ +class mini_formatter { +public: + mini_formatter() = default; + /** Add a comma **/ + simdjson_really_inline void comma(); + /** Start an array, prints [ **/ + simdjson_really_inline void start_array(); + /** End an array, prints ] **/ + simdjson_really_inline void end_array(); + /** Start an array, prints { **/ + simdjson_really_inline void start_object(); + /** Start an array, prints } **/ + simdjson_really_inline void end_object(); + /** Prints a true **/ + simdjson_really_inline void true_atom(); + /** Prints a false **/ + simdjson_really_inline void false_atom(); + /** Prints a null **/ + simdjson_really_inline void null_atom(); + /** Prints a number **/ + simdjson_really_inline void number(int64_t x); + /** Prints a number **/ + simdjson_really_inline void number(uint64_t x); + /** Prints a number **/ + simdjson_really_inline void number(double x); + /** Prints a key (string + colon) **/ + simdjson_really_inline void key(std::string_view unescaped); + /** Prints a string. The string is escaped as needed. **/ + simdjson_really_inline void string(std::string_view unescaped); + /** Clears out the content. **/ + simdjson_really_inline void clear(); + /** + * Get access to the buffer, it is owned by the instance, but + * the user can make a copy. + **/ + simdjson_really_inline std::string_view str() const; + +private: + // implementation details (subject to change) + /** Prints one character **/ + simdjson_really_inline void one_char(char c); + /** Backing buffer **/ + std::vector buffer{}; // not ideal! +}; + +} // internal + +namespace dom { + +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif +} // namespace dom + +/** + * Converts JSON to a string. + * + * dom::parser parser; + * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); + * cout << to_string(doc) << endl; // prints [1,2,3] + * + */ +template +std::string to_string(T x) { + // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/ + // Currently minify and to_string are identical but in the future, they may + // differ. + simdjson::internal::string_builder<> sb; + sb.append(x); + std::string_view answer = sb.str(); + return std::string(answer.data(), answer.size()); +} +#if SIMDJSON_EXCEPTIONS +template +std::string to_string(simdjson_result x) { + if (x.error()) { throw simdjson_error(x.error()); } + return to_string(x.value()); +} +#endif + +/** + * Minifies a JSON element or document, printing the smallest possible valid JSON. + * + * dom::parser parser; + * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); + * cout << minify(doc) << endl; // prints [1,2,3] + * + */ +template +std::string minify(T x) { + return to_string(x); +} + +#if SIMDJSON_EXCEPTIONS +template +std::string minify(simdjson_result x) { + if (x.error()) { throw simdjson_error(x.error()); } + return to_string(x.value()); +} +#endif + + +} // namespace simdjson + + +#endif +/* end file include/simdjson/dom/serialization.h */ + +// Deprecated API +/* begin file include/simdjson/dom/jsonparser.h */ +// TODO Remove this -- deprecated API and files + +#ifndef SIMDJSON_DOM_JSONPARSER_H +#define SIMDJSON_DOM_JSONPARSER_H + +/* begin file include/simdjson/dom/parsedjson.h */ +// TODO Remove this -- deprecated API and files + +#ifndef SIMDJSON_DOM_PARSEDJSON_H +#define SIMDJSON_DOM_PARSEDJSON_H + + +namespace simdjson { + +/** + * @deprecated Use `dom::parser` instead. + */ +using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser; + +} // namespace simdjson + +#endif // SIMDJSON_DOM_PARSEDJSON_H +/* end file include/simdjson/dom/parsedjson.h */ +/* begin file include/simdjson/jsonioutil.h */ +#ifndef SIMDJSON_JSONIOUTIL_H +#define SIMDJSON_JSONIOUTIL_H + + +namespace simdjson { + +#if SIMDJSON_EXCEPTIONS +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("Use padded_string::load() instead")]] +inline padded_string get_corpus(const char *path) { + return padded_string::load(path); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API +#endif // SIMDJSON_EXCEPTIONS + +} // namespace simdjson + +#endif // SIMDJSON_JSONIOUTIL_H +/* end file include/simdjson/jsonioutil.h */ + +namespace simdjson { + +// +// C API (json_parse and build_parsed_json) declarations +// + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const uint8_t *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const char *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const std::string &s, dom::parser &parser, bool realloc_if_needed = true) noexcept { + error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const padded_string &s, dom::parser &parser) noexcept { + error_code code = parser.parse(s).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} + +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept { + dom::parser parser; + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept { + dom::parser parser; + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept { + dom::parser parser; + error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const padded_string &s) noexcept { + dom::parser parser; + error_code code = parser.parse(s).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +/** @private We do not want to allow implicit conversion from C string to std::string. */ +int json_parse(const char *buf, dom::parser &parser) noexcept = delete; +/** @private We do not want to allow implicit conversion from C string to std::string. */ +dom::parser build_parsed_json(const char *buf) noexcept = delete; + +} // namespace simdjson + +#endif // SIMDJSON_DOM_JSONPARSER_H +/* end file include/simdjson/dom/jsonparser.h */ +/* begin file include/simdjson/dom/parsedjson_iterator.h */ +// TODO Remove this -- deprecated API and files + +#ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H +#define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H + +#include +#include +#include +#include +#include +#include + +/* begin file include/simdjson/internal/jsonformatutils.h */ +#ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H +#define SIMDJSON_INTERNAL_JSONFORMATUTILS_H + +#include +#include +#include + +namespace simdjson { +namespace internal { + +class escape_json_string; + +inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str); + +class escape_json_string { +public: + escape_json_string(std::string_view _str) noexcept : str{_str} {} + operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } +private: + std::string_view str; + friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped); +}; + +inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) { + for (size_t i=0; i(unescaped.str[i]) <= 0x1F) { + // TODO can this be done once at the beginning, or will it mess up << char? + std::ios::fmtflags f(out.flags()); + out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(unescaped.str[i]); + out.flags(f); + } else { + out << unescaped.str[i]; + } + } + } + return out; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H +/* end file include/simdjson/internal/jsonformatutils.h */ + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + +namespace simdjson { +/** @private **/ +class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)")]] dom::parser::Iterator { +public: + inline Iterator(const dom::parser &parser) noexcept(false); + inline Iterator(const Iterator &o) noexcept; + inline ~Iterator() noexcept; + + inline Iterator& operator=(const Iterator&) = delete; + + inline bool is_ok() const; + + // useful for debugging purposes + inline size_t get_tape_location() const; + + // useful for debugging purposes + inline size_t get_tape_length() const; + + // returns the current depth (start at 1 with 0 reserved for the fictitious + // root node) + inline size_t get_depth() const; + + // A scope is a series of nodes at the same depth, typically it is either an + // object ({) or an array ([). The root node has type 'r'. + inline uint8_t get_scope_type() const; + + // move forward in document order + inline bool move_forward(); + + // retrieve the character code of what we're looking at: + // [{"slutfn are the possibilities + inline uint8_t get_type() const { + return current_type; // short functions should be inlined! + } + + // get the int64_t value at this node; valid only if get_type is "l" + inline int64_t get_integer() const { + if (location + 1 >= tape_length) { + return 0; // default value in case of error + } + return static_cast(doc.tape[location + 1]); + } + + // get the value as uint64; valid only if if get_type is "u" + inline uint64_t get_unsigned_integer() const { + if (location + 1 >= tape_length) { + return 0; // default value in case of error + } + return doc.tape[location + 1]; + } + + // get the string value at this node (NULL ended); valid only if get_type is " + // note that tabs, and line endings are escaped in the returned value (see + // print_with_escapes) return value is valid UTF-8, it may contain NULL chars + // within the string: get_string_length determines the true string length. + inline const char *get_string() const { + return reinterpret_cast( + doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t)); + } + + // return the length of the string in bytes + inline uint32_t get_string_length() const { + uint32_t answer; + std::memcpy(&answer, + reinterpret_cast(doc.string_buf.get() + + (current_val & internal::JSON_VALUE_MASK)), + sizeof(uint32_t)); + return answer; + } + + // get the double value at this node; valid only if + // get_type() is "d" + inline double get_double() const { + if (location + 1 >= tape_length) { + return std::numeric_limits::quiet_NaN(); // default value in + // case of error + } + double answer; + std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer)); + return answer; + } + + inline bool is_object_or_array() const { return is_object() || is_array(); } + + inline bool is_object() const { return get_type() == '{'; } + + inline bool is_array() const { return get_type() == '['; } + + inline bool is_string() const { return get_type() == '"'; } + + // Returns true if the current type of the node is an signed integer. + // You can get its value with `get_integer()`. + inline bool is_integer() const { return get_type() == 'l'; } + + // Returns true if the current type of the node is an unsigned integer. + // You can get its value with `get_unsigned_integer()`. + // + // NOTE: + // Only a large value, which is out of range of a 64-bit signed integer, is + // represented internally as an unsigned node. On the other hand, a typical + // positive integer, such as 1, 42, or 1000000, is as a signed node. + // Be aware this function returns false for a signed node. + inline bool is_unsigned_integer() const { return get_type() == 'u'; } + // Returns true if the current type of the node is a double floating-point number. + inline bool is_double() const { return get_type() == 'd'; } + // Returns true if the current type of the node is a number (integer or floating-point). + inline bool is_number() const { + return is_integer() || is_unsigned_integer() || is_double(); + } + // Returns true if the current type of the node is a bool with true value. + inline bool is_true() const { return get_type() == 't'; } + // Returns true if the current type of the node is a bool with false value. + inline bool is_false() const { return get_type() == 'f'; } + // Returns true if the current type of the node is null. + inline bool is_null() const { return get_type() == 'n'; } + // Returns true if the type byte represents an object of an array + static bool is_object_or_array(uint8_t type) { + return ((type == '[') || (type == '{')); + } + + // when at {, go one level deep, looking for a given key + // if successful, we are left pointing at the value, + // if not, we are still pointing at the object ({) + // (in case of repeated keys, this only finds the first one). + // We seek the key using C's strcmp so if your JSON strings contain + // NULL chars, this would trigger a false positive: if you expect that + // to be the case, take extra precautions. + // Furthermore, we do the comparison character-by-character + // without taking into account Unicode equivalence. + inline bool move_to_key(const char *key); + + // as above, but case insensitive lookup (strcmpi instead of strcmp) + inline bool move_to_key_insensitive(const char *key); + + // when at {, go one level deep, looking for a given key + // if successful, we are left pointing at the value, + // if not, we are still pointing at the object ({) + // (in case of repeated keys, this only finds the first one). + // The string we search for can contain NULL values. + // Furthermore, we do the comparison character-by-character + // without taking into account Unicode equivalence. + inline bool move_to_key(const char *key, uint32_t length); + + // when at a key location within an object, this moves to the accompanying + // value (located next to it). This is equivalent but much faster than + // calling "next()". + inline void move_to_value(); + + // when at [, go one level deep, and advance to the given index. + // if successful, we are left pointing at the value, + // if not, we are still pointing at the array ([) + inline bool move_to_index(uint32_t index); + + // Moves the iterator to the value corresponding to the json pointer. + // Always search from the root of the document. + // if successful, we are left pointing at the value, + // if not, we are still pointing the same value we were pointing before the + // call. The json pointer follows the rfc6901 standard's syntax: + // https://tools.ietf.org/html/rfc6901 However, the standard says "If a + // referenced member name is not unique in an object, the member that is + // referenced is undefined, and evaluation fails". Here we just return the + // first corresponding value. The length parameter is the length of the + // jsonpointer string ('pointer'). + inline bool move_to(const char *pointer, uint32_t length); + + // Moves the iterator to the value corresponding to the json pointer. + // Always search from the root of the document. + // if successful, we are left pointing at the value, + // if not, we are still pointing the same value we were pointing before the + // call. The json pointer implementation follows the rfc6901 standard's + // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says + // "If a referenced member name is not unique in an object, the member that + // is referenced is undefined, and evaluation fails". Here we just return + // the first corresponding value. + inline bool move_to(const std::string &pointer) { + return move_to(pointer.c_str(), uint32_t(pointer.length())); + } + + private: + // Almost the same as move_to(), except it searches from the current + // position. The pointer's syntax is identical, though that case is not + // handled by the rfc6901 standard. The '/' is still required at the + // beginning. However, contrary to move_to(), the URI Fragment Identifier + // Representation is not supported here. Also, in case of failure, we are + // left pointing at the closest value it could reach. For these reasons it + // is private. It exists because it is used by move_to(). + inline bool relative_move_to(const char *pointer, uint32_t length); + + public: + // throughout return true if we can do the navigation, false + // otherwise + + // Within a given scope (series of nodes at the same depth within either an + // array or an object), we move forward. + // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { + // and [. At the object ({) or at the array ([), you can issue a "down" to + // visit their content. valid if we're not at the end of a scope (returns + // true). + inline bool next(); + + // Within a given scope (series of nodes at the same depth within either an + // array or an object), we move backward. + // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true + // when starting at the end of the scope. At the object ({) or at the array + // ([), you can issue a "down" to visit their content. + // Performance warning: This function is implemented by starting again + // from the beginning of the scope and scanning forward. You should expect + // it to be relatively slow. + inline bool prev(); + + // Moves back to either the containing array or object (type { or [) from + // within a contained scope. + // Valid unless we are at the first level of the document + inline bool up(); + + // Valid if we're at a [ or { and it starts a non-empty scope; moves us to + // start of that deeper scope if it not empty. Thus, given [true, null, + // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node. + inline bool down(); + + // move us to the start of our current scope, + // a scope is a series of nodes at the same level + inline void to_start_scope(); + + inline void rewind() { + while (up()) + ; + } + + + + // print the node we are currently pointing at + inline bool print(std::ostream &os, bool escape_strings = true) const; + + private: + const document &doc; + size_t max_depth{}; + size_t depth{}; + size_t location{}; // our current location on a tape + size_t tape_length{}; + uint8_t current_type{}; + uint64_t current_val{}; + typedef struct { + size_t start_of_scope; + uint8_t scope_type; + } scopeindex_t; + + scopeindex_t *depth_index{}; +}; + +} // namespace simdjson +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +#endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H +/* end file include/simdjson/dom/parsedjson_iterator.h */ + +// Inline functions +/* begin file include/simdjson/dom/array-inl.h */ +#ifndef SIMDJSON_INLINE_ARRAY_H +#define SIMDJSON_INLINE_ARRAY_H + +// Inline implementations go in here. + +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_really_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_really_inline simdjson_result::simdjson_result(dom::array value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} + +#if SIMDJSON_EXCEPTIONS + +inline dom::array::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +inline dom::array::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +inline size_t simdjson_result::size() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.size(); +} + +#endif // SIMDJSON_EXCEPTIONS + +inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +inline simdjson_result simdjson_result::at(size_t index) const noexcept { + if (error()) { return error(); } + return first.at(index); +} + +namespace dom { + +// +// array inline implementation +// +simdjson_really_inline array::array() noexcept : tape{} {} +simdjson_really_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} +inline array::iterator array::begin() const noexcept { + return internal::tape_ref(tape.doc, tape.json_index + 1); +} +inline array::iterator array::end() const noexcept { + return internal::tape_ref(tape.doc, tape.after_element() - 1); +} +inline size_t array::size() const noexcept { + return tape.scope_count(); +} +inline size_t array::number_of_slots() const noexcept { + return tape.matching_brace_index() - tape.json_index; +} +inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { + if(json_pointer.empty()) { // an empty string means that we return the current node + return element(this->tape); // copy the current node + } else if(json_pointer[0] != '/') { // otherwise there is an error + return INVALID_JSON_POINTER; + } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + + // Get the child + auto child = array(tape).at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at(size_t index) const noexcept { + size_t i=0; + for (auto element : *this) { + if (i == index) { return element; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +// +// array::iterator inline implementation +// +simdjson_really_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline element array::iterator::operator*() const noexcept { + return element(tape); +} +inline array::iterator& array::iterator::operator++() noexcept { + tape.json_index = tape.after_element(); + return *this; +} +inline array::iterator array::iterator::operator++(int) noexcept { + array::iterator out = *this; + ++*this; + return out; +} +inline bool array::iterator::operator!=(const array::iterator& other) const noexcept { + return tape.json_index != other.tape.json_index; +} +inline bool array::iterator::operator==(const array::iterator& other) const noexcept { + return tape.json_index == other.tape.json_index; +} +inline bool array::iterator::operator<(const array::iterator& other) const noexcept { + return tape.json_index < other.tape.json_index; +} +inline bool array::iterator::operator<=(const array::iterator& other) const noexcept { + return tape.json_index <= other.tape.json_index; +} +inline bool array::iterator::operator>=(const array::iterator& other) const noexcept { + return tape.json_index >= other.tape.json_index; +} +inline bool array::iterator::operator>(const array::iterator& other) const noexcept { + return tape.json_index > other.tape.json_index; +} + +} // namespace dom + + +} // namespace simdjson + +/* begin file include/simdjson/dom/element-inl.h */ +#ifndef SIMDJSON_INLINE_ELEMENT_H +#define SIMDJSON_INLINE_ELEMENT_H + +#include +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_really_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_really_inline simdjson_result::simdjson_result(dom::element &&value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} +inline simdjson_result simdjson_result::type() const noexcept { + if (error()) { return error(); } + return first.type(); +} + +template +simdjson_really_inline bool simdjson_result::is() const noexcept { + return !error() && first.is(); +} +template +simdjson_really_inline simdjson_result simdjson_result::get() const noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_warn_unused simdjson_really_inline error_code simdjson_result::get(T &value) const noexcept { + if (error()) { return error(); } + return first.get(value); +} + +simdjson_really_inline simdjson_result simdjson_result::get_array() const noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_really_inline simdjson_result simdjson_result::get_object() const noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_really_inline simdjson_result simdjson_result::get_c_str() const noexcept { + if (error()) { return error(); } + return first.get_c_str(); +} +simdjson_really_inline simdjson_result simdjson_result::get_string_length() const noexcept { + if (error()) { return error(); } + return first.get_string_length(); +} +simdjson_really_inline simdjson_result simdjson_result::get_string() const noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_int64() const noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_uint64() const noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_double() const noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_really_inline simdjson_result simdjson_result::get_bool() const noexcept { + if (error()) { return error(); } + return first.get_bool(); +} + +simdjson_really_inline bool simdjson_result::is_array() const noexcept { + return !error() && first.is_array(); +} +simdjson_really_inline bool simdjson_result::is_object() const noexcept { + return !error() && first.is_object(); +} +simdjson_really_inline bool simdjson_result::is_string() const noexcept { + return !error() && first.is_string(); +} +simdjson_really_inline bool simdjson_result::is_int64() const noexcept { + return !error() && first.is_int64(); +} +simdjson_really_inline bool simdjson_result::is_uint64() const noexcept { + return !error() && first.is_uint64(); +} +simdjson_really_inline bool simdjson_result::is_double() const noexcept { + return !error() && first.is_double(); +} +simdjson_really_inline bool simdjson_result::is_number() const noexcept { + return !error() && first.is_number(); +} +simdjson_really_inline bool simdjson_result::is_bool() const noexcept { + return !error() && first.is_bool(); +} + +simdjson_really_inline bool simdjson_result::is_null() const noexcept { + return !error() && first.is_null(); +} + +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::at_pointer(const std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] +simdjson_really_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_DEPRECATED_WARNING + if (error()) { return error(); } + return first.at(json_pointer); +SIMDJSON_POP_DISABLE_WARNINGS +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API +simdjson_really_inline simdjson_result simdjson_result::at(size_t index) const noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_really_inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key(key); +} +simdjson_really_inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key_case_insensitive(key); +} + +#if SIMDJSON_EXCEPTIONS + +simdjson_really_inline simdjson_result::operator bool() const noexcept(false) { + return get(); +} +simdjson_really_inline simdjson_result::operator const char *() const noexcept(false) { + return get(); +} +simdjson_really_inline simdjson_result::operator std::string_view() const noexcept(false) { + return get(); +} +simdjson_really_inline simdjson_result::operator uint64_t() const noexcept(false) { + return get(); +} +simdjson_really_inline simdjson_result::operator int64_t() const noexcept(false) { + return get(); +} +simdjson_really_inline simdjson_result::operator double() const noexcept(false) { + return get(); +} +simdjson_really_inline simdjson_result::operator dom::array() const noexcept(false) { + return get(); +} +simdjson_really_inline simdjson_result::operator dom::object() const noexcept(false) { + return get(); +} + +simdjson_really_inline dom::array::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +simdjson_really_inline dom::array::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} + +#endif // SIMDJSON_EXCEPTIONS + +namespace dom { + +// +// element inline implementation +// +simdjson_really_inline element::element() noexcept : tape{} {} +simdjson_really_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { } + +inline element_type element::type() const noexcept { + auto tape_type = tape.tape_ref_type(); + return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast(tape_type); +} + +inline simdjson_result element::get_bool() const noexcept { + if(tape.is_true()) { + return true; + } else if(tape.is_false()) { + return false; + } + return INCORRECT_TYPE; +} +inline simdjson_result element::get_c_str() const noexcept { + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_c_str(); + } + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_string_length() const noexcept { + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_string_length(); + } + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_string() const noexcept { + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: + return tape.get_string_view(); + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_uint64() const noexcept { + if(simdjson_unlikely(!tape.is_uint64())) { // branch rarely taken + if(tape.is_int64()) { + int64_t result = tape.next_tape_value(); + if (result < 0) { + return NUMBER_OUT_OF_RANGE; + } + return uint64_t(result); + } + return INCORRECT_TYPE; + } + return tape.next_tape_value(); +} +inline simdjson_result element::get_int64() const noexcept { + if(simdjson_unlikely(!tape.is_int64())) { // branch rarely taken + if(tape.is_uint64()) { + uint64_t result = tape.next_tape_value(); + // Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std + if (result > uint64_t((std::numeric_limits::max)())) { + return NUMBER_OUT_OF_RANGE; + } + return static_cast(result); + } + return INCORRECT_TYPE; + } + return tape.next_tape_value(); +} +inline simdjson_result element::get_double() const noexcept { + // Performance considerations: + // 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight + // comparison. + // 2. Using a switch-case relies on the compiler guessing what kind of code generation + // we want... But the compiler cannot know that we expect the type to be "double" + // most of the time. + // We can expect get to refer to a double type almost all the time. + // It is important to craft the code accordingly so that the compiler can use this + // information. (This could also be solved with profile-guided optimization.) + if(simdjson_unlikely(!tape.is_double())) { // branch rarely taken + if(tape.is_uint64()) { + return double(tape.next_tape_value()); + } else if(tape.is_int64()) { + return double(tape.next_tape_value()); + } + return INCORRECT_TYPE; + } + // this is common: + return tape.next_tape_value(); +} +inline simdjson_result element::get_array() const noexcept { + switch (tape.tape_ref_type()) { + case internal::tape_type::START_ARRAY: + return array(tape); + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_object() const noexcept { + switch (tape.tape_ref_type()) { + case internal::tape_type::START_OBJECT: + return object(tape); + default: + return INCORRECT_TYPE; + } +} + +template +simdjson_warn_unused simdjson_really_inline error_code element::get(T &value) const noexcept { + return get().get(value); +} +// An element-specific version prevents recursion with simdjson_result::get(value) +template<> +simdjson_warn_unused simdjson_really_inline error_code element::get(element &value) const noexcept { + value = element(tape); + return SUCCESS; +} +template +inline void element::tie(T &value, error_code &error) && noexcept { + error = get(value); +} + +template +simdjson_really_inline bool element::is() const noexcept { + auto result = get(); + return !result.error(); +} + +template<> inline simdjson_result element::get() const noexcept { return get_array(); } +template<> inline simdjson_result element::get() const noexcept { return get_object(); } +template<> inline simdjson_result element::get() const noexcept { return get_c_str(); } +template<> inline simdjson_result element::get() const noexcept { return get_string(); } +template<> inline simdjson_result element::get() const noexcept { return get_int64(); } +template<> inline simdjson_result element::get() const noexcept { return get_uint64(); } +template<> inline simdjson_result element::get() const noexcept { return get_double(); } +template<> inline simdjson_result element::get() const noexcept { return get_bool(); } + +inline bool element::is_array() const noexcept { return is(); } +inline bool element::is_object() const noexcept { return is(); } +inline bool element::is_string() const noexcept { return is(); } +inline bool element::is_int64() const noexcept { return is(); } +inline bool element::is_uint64() const noexcept { return is(); } +inline bool element::is_double() const noexcept { return is(); } +inline bool element::is_bool() const noexcept { return is(); } +inline bool element::is_number() const noexcept { return is_int64() || is_uint64() || is_double(); } + +inline bool element::is_null() const noexcept { + return tape.is_null_on_tape(); +} + +#if SIMDJSON_EXCEPTIONS + +inline element::operator bool() const noexcept(false) { return get(); } +inline element::operator const char*() const noexcept(false) { return get(); } +inline element::operator std::string_view() const noexcept(false) { return get(); } +inline element::operator uint64_t() const noexcept(false) { return get(); } +inline element::operator int64_t() const noexcept(false) { return get(); } +inline element::operator double() const noexcept(false) { return get(); } +inline element::operator array() const noexcept(false) { return get(); } +inline element::operator object() const noexcept(false) { return get(); } + +inline array::iterator element::begin() const noexcept(false) { + return get().begin(); +} +inline array::iterator element::end() const noexcept(false) { + return get().end(); +} + +#endif // SIMDJSON_EXCEPTIONS + +inline simdjson_result element::operator[](std::string_view key) const noexcept { + return at_key(key); +} +inline simdjson_result element::operator[](const char *key) const noexcept { + return at_key(key); +} + +inline simdjson_result element::at_pointer(std::string_view json_pointer) const noexcept { + switch (tape.tape_ref_type()) { + case internal::tape_type::START_OBJECT: + return object(tape).at_pointer(json_pointer); + case internal::tape_type::START_ARRAY: + return array(tape).at_pointer(json_pointer); + default: { + if(!json_pointer.empty()) { // a non-empty string is invalid on an atom + return INVALID_JSON_POINTER; + } + // an empty string means that we return the current node + dom::element copy(*this); + return simdjson_result(std::move(copy)); + } + } +} +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] +inline simdjson_result element::at(std::string_view json_pointer) const noexcept { + // version 0.4 of simdjson allowed non-compliant pointers + auto std_pointer = (json_pointer.empty() ? "" : "/") + std::string(json_pointer.begin(), json_pointer.end()); + return at_pointer(std_pointer); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline simdjson_result element::at(size_t index) const noexcept { + return get().at(index); +} +inline simdjson_result element::at_key(std::string_view key) const noexcept { + return get().at_key(key); +} +inline simdjson_result element::at_key_case_insensitive(std::string_view key) const noexcept { + return get().at_key_case_insensitive(key); +} + +inline bool element::dump_raw_tape(std::ostream &out) const noexcept { + return tape.doc->dump_raw_tape(out); +} + + +inline std::ostream& operator<<(std::ostream& out, element_type type) { + switch (type) { + case element_type::ARRAY: + return out << "array"; + case element_type::OBJECT: + return out << "object"; + case element_type::INT64: + return out << "int64_t"; + case element_type::UINT64: + return out << "uint64_t"; + case element_type::DOUBLE: + return out << "double"; + case element_type::STRING: + return out << "string"; + case element_type::BOOL: + return out << "bool"; + case element_type::NULL_VALUE: + return out << "null"; + default: + return out << "unexpected content!!!"; // abort() usage is forbidden in the library + } +} + +} // namespace dom + +} // namespace simdjson + +#endif // SIMDJSON_INLINE_ELEMENT_H +/* end file include/simdjson/dom/element-inl.h */ + +#if defined(__cpp_lib_ranges) +static_assert(std::ranges::view); +static_assert(std::ranges::sized_range); +#if SIMDJSON_EXCEPTIONS +static_assert(std::ranges::view>); +static_assert(std::ranges::sized_range>); +#endif // SIMDJSON_EXCEPTIONS +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_INLINE_ARRAY_H +/* end file include/simdjson/dom/array-inl.h */ +/* begin file include/simdjson/dom/document_stream-inl.h */ +#ifndef SIMDJSON_INLINE_DOCUMENT_STREAM_H +#define SIMDJSON_INLINE_DOCUMENT_STREAM_H + +#include +#include +#include +namespace simdjson { +namespace dom { + +#ifdef SIMDJSON_THREADS_ENABLED +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} +#endif + +simdjson_really_inline document_stream::document_stream( + dom::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + error{SUCCESS} +#ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change +#endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_really_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + error{UNINITIALIZED} +#ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) +#endif +{ +} + +simdjson_really_inline document_stream::~document_stream() noexcept { +#ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); +#endif +} + +simdjson_really_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_really_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_really_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +simdjson_really_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_really_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept { + // Note that in case of error, we do not yet mark + // the iterator as "finished": this detection is done + // in the operator++ function since it is possible + // to call operator++ repeatedly while omitting + // calls to operator*. + if (stream->error) { return stream->error; } + return stream->parser->doc.root(); +} + +simdjson_really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_really_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->ensure_capacity(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } +#ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread if needed + error = stage1_thread_parser.ensure_capacity(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } +#endif // SIMDJSON_THREADS_ENABLED + next(); +} + +simdjson_really_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_really_inline std::string_view document_stream::iterator::source() const noexcept { + const char* start = reinterpret_cast(stream->buf) + current_index(); + bool object_or_array = ((*start == '[') || (*start == '{')); + if(object_or_array) { + size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index - 1]; + return std::string_view(start, next_doc_index - current_index() + 1); + } else { + size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index]; + return std::string_view(reinterpret_cast(stream->buf) + current_index(), next_doc_index - current_index() - 1); + } +} + + +inline void document_stream::next() noexcept { + // We always exit at once, once in an error condition. + if (error) { return; } + + // Load the next document from the batch + doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; + error = parser->implementation->stage2_next(parser->doc); + // If that was the last document in the batch, load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + +#ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } +#else + error = run_stage1(*parser, batch_start); +#endif + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + // Run stage 2 on the first document in the batch + doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; + error = parser->implementation->stage2_next(parser->doc); + } +} +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(dom::parser &p, size_t _batch_start) noexcept { + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(*parser, stage1_thread_parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace dom + +simdjson_really_inline simdjson_result::simdjson_result() noexcept + : simdjson_result_base() { +} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : simdjson_result_base(error) { +} +simdjson_really_inline simdjson_result::simdjson_result(dom::document_stream &&value) noexcept + : simdjson_result_base(std::forward(value)) { +} + +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline dom::document_stream::iterator simdjson_result::begin() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +simdjson_really_inline dom::document_stream::iterator simdjson_result::end() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +#else // SIMDJSON_EXCEPTIONS +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +simdjson_really_inline dom::document_stream::iterator simdjson_result::begin() noexcept { + first.error = error(); + return first.begin(); +} +simdjson_really_inline dom::document_stream::iterator simdjson_result::end() noexcept { + first.error = error(); + return first.end(); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API +#endif // SIMDJSON_EXCEPTIONS + +} // namespace simdjson +#endif // SIMDJSON_INLINE_DOCUMENT_STREAM_H +/* end file include/simdjson/dom/document_stream-inl.h */ +/* begin file include/simdjson/dom/document-inl.h */ +#ifndef SIMDJSON_INLINE_DOCUMENT_H +#define SIMDJSON_INLINE_DOCUMENT_H + +// Inline implementations go in here. + +#include +#include + +namespace simdjson { +namespace dom { + +// +// document inline implementation +// +inline element document::root() const noexcept { + return element(internal::tape_ref(this, 1)); +} +simdjson_warn_unused +inline size_t document::capacity() const noexcept { + return allocated_capacity; +} + +simdjson_warn_unused +inline error_code document::allocate(size_t capacity) noexcept { + if (capacity == 0) { + string_buf.reset(); + tape.reset(); + allocated_capacity = 0; + return SUCCESS; + } + + // a pathological input like "[[[[..." would generate capacity tape elements, so + // need a capacity of at least capacity + 1, but it is also possible to do + // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" + //where capacity + 1 tape elements are + // generated, see issue https://github.com/simdjson/simdjson/issues/345 + size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64); + // a document with only zero-length strings... could have capacity/3 string + // and we would need capacity/3 * 5 bytes on the string buffer + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset( new (std::nothrow) uint8_t[string_capacity]); + tape.reset(new (std::nothrow) uint64_t[tape_capacity]); + if(!(string_buf && tape)) { + allocated_capacity = 0; + string_buf.reset(); + tape.reset(); + return MEMALLOC; + } + // Technically the allocated_capacity might be larger than capacity + // so the next line is pessimistic. + allocated_capacity = capacity; + return SUCCESS; +} + +inline bool document::dump_raw_tape(std::ostream &os) const noexcept { + uint32_t string_length; + size_t tape_idx = 0; + uint64_t tape_val = tape[tape_idx]; + uint8_t type = uint8_t(tape_val >> 56); + os << tape_idx << " : " << type; + tape_idx++; + size_t how_many = 0; + if (type == 'r') { + how_many = size_t(tape_val & internal::JSON_VALUE_MASK); + } else { + // Error: no starting root node? + return false; + } + os << "\t// pointing to " << how_many << " (right after last node)\n"; + uint64_t payload; + for (; tape_idx < how_many; tape_idx++) { + os << tape_idx << " : "; + tape_val = tape[tape_idx]; + payload = tape_val & internal::JSON_VALUE_MASK; + type = uint8_t(tape_val >> 56); + switch (type) { + case '"': // we have a string + os << "string \""; + std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); + os << internal::escape_json_string(std::string_view( + reinterpret_cast(string_buf.get() + payload + sizeof(uint32_t)), + string_length + )); + os << '"'; + os << '\n'; + break; + case 'l': // we have a long int + if (tape_idx + 1 >= how_many) { + return false; + } + os << "integer " << static_cast(tape[++tape_idx]) << "\n"; + break; + case 'u': // we have a long uint + if (tape_idx + 1 >= how_many) { + return false; + } + os << "unsigned integer " << tape[++tape_idx] << "\n"; + break; + case 'd': // we have a double + os << "float "; + if (tape_idx + 1 >= how_many) { + return false; + } + double answer; + std::memcpy(&answer, &tape[++tape_idx], sizeof(answer)); + os << answer << '\n'; + break; + case 'n': // we have a null + os << "null\n"; + break; + case 't': // we have a true + os << "true\n"; + break; + case 'f': // we have a false + os << "false\n"; + break; + case '{': // we have an object + os << "{\t// pointing to next tape location " << uint32_t(payload) + << " (first node after the scope), " + << " saturated count " + << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; + break; case '}': // we end an object + os << "}\t// pointing to previous tape location " << uint32_t(payload) + << " (start of the scope)\n"; + break; + case '[': // we start an array + os << "[\t// pointing to next tape location " << uint32_t(payload) + << " (first node after the scope), " + << " saturated count " + << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; + break; + case ']': // we end an array + os << "]\t// pointing to previous tape location " << uint32_t(payload) + << " (start of the scope)\n"; + break; + case 'r': // we start and end with the root node + // should we be hitting the root node? + return false; + default: + return false; + } + } + tape_val = tape[tape_idx]; + payload = tape_val & internal::JSON_VALUE_MASK; + type = uint8_t(tape_val >> 56); + os << tape_idx << " : " << type << "\t// pointing to " << payload + << " (start root)\n"; + return true; +} + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_INLINE_DOCUMENT_H +/* end file include/simdjson/dom/document-inl.h */ +/* begin file include/simdjson/dom/object-inl.h */ +#ifndef SIMDJSON_INLINE_OBJECT_H +#define SIMDJSON_INLINE_OBJECT_H + +#include +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_really_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_really_inline simdjson_result::simdjson_result(dom::object value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} + +inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key(key); +} +inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key_case_insensitive(key); +} + +#if SIMDJSON_EXCEPTIONS + +inline dom::object::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +inline dom::object::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +inline size_t simdjson_result::size() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.size(); +} + +#endif // SIMDJSON_EXCEPTIONS + +namespace dom { + +// +// object inline implementation +// +simdjson_really_inline object::object() noexcept : tape{} {} +simdjson_really_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline object::iterator object::begin() const noexcept { + return internal::tape_ref(tape.doc, tape.json_index + 1); +} +inline object::iterator object::end() const noexcept { + return internal::tape_ref(tape.doc, tape.after_element() - 1); +} +inline size_t object::size() const noexcept { + return tape.scope_count(); +} + +inline simdjson_result object::operator[](std::string_view key) const noexcept { + return at_key(key); +} +inline simdjson_result object::operator[](const char *key) const noexcept { + return at_key(key); +} +inline simdjson_result object::at_pointer(std::string_view json_pointer) const noexcept { + if(json_pointer.empty()) { // an empty string means that we return the current node + return element(this->tape); // copy the current node + } else if(json_pointer[0] != '/') { // otherwise there is an error + return INVALID_JSON_POINTER; + } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = at_key(unescaped); + } else { + child = at_key(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_key(std::string_view key) const noexcept { + iterator end_field = end(); + for (iterator field = begin(); field != end_field; ++field) { + if (field.key_equals(key)) { + return field.value(); + } + } + return NO_SUCH_FIELD; +} +// In case you wonder why we need this, please see +// https://github.com/simdjson/simdjson/issues/323 +// People do seek keys in a case-insensitive manner. +inline simdjson_result object::at_key_case_insensitive(std::string_view key) const noexcept { + iterator end_field = end(); + for (iterator field = begin(); field != end_field; ++field) { + if (field.key_equals_case_insensitive(key)) { + return field.value(); + } + } + return NO_SUCH_FIELD; +} + +// +// object::iterator inline implementation +// +simdjson_really_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline const key_value_pair object::iterator::operator*() const noexcept { + return key_value_pair(key(), value()); +} +inline bool object::iterator::operator!=(const object::iterator& other) const noexcept { + return tape.json_index != other.tape.json_index; +} +inline bool object::iterator::operator==(const object::iterator& other) const noexcept { + return tape.json_index == other.tape.json_index; +} +inline bool object::iterator::operator<(const object::iterator& other) const noexcept { + return tape.json_index < other.tape.json_index; +} +inline bool object::iterator::operator<=(const object::iterator& other) const noexcept { + return tape.json_index <= other.tape.json_index; +} +inline bool object::iterator::operator>=(const object::iterator& other) const noexcept { + return tape.json_index >= other.tape.json_index; +} +inline bool object::iterator::operator>(const object::iterator& other) const noexcept { + return tape.json_index > other.tape.json_index; +} +inline object::iterator& object::iterator::operator++() noexcept { + tape.json_index++; + tape.json_index = tape.after_element(); + return *this; +} +inline object::iterator object::iterator::operator++(int) noexcept { + object::iterator out = *this; + ++*this; + return out; +} +inline std::string_view object::iterator::key() const noexcept { + return tape.get_string_view(); +} +inline uint32_t object::iterator::key_length() const noexcept { + return tape.get_string_length(); +} +inline const char* object::iterator::key_c_str() const noexcept { + return reinterpret_cast(&tape.doc->string_buf[size_t(tape.tape_value()) + sizeof(uint32_t)]); +} +inline element object::iterator::value() const noexcept { + return element(internal::tape_ref(tape.doc, tape.json_index + 1)); +} + +/** + * Design notes: + * Instead of constructing a string_view and then comparing it with a + * user-provided strings, it is probably more performant to have dedicated + * functions taking as a parameter the string we want to compare against + * and return true when they are equal. That avoids the creation of a temporary + * std::string_view. Though it is possible for the compiler to avoid entirely + * any overhead due to string_view, relying too much on compiler magic is + * problematic: compiler magic sometimes fail, and then what do you do? + * Also, enticing users to rely on high-performance function is probably better + * on the long run. + */ + +inline bool object::iterator::key_equals(std::string_view o) const noexcept { + // We use the fact that the key length can be computed quickly + // without access to the string buffer. + const uint32_t len = key_length(); + if(o.size() == len) { + // We avoid construction of a temporary string_view instance. + return (memcmp(o.data(), key_c_str(), len) == 0); + } + return false; +} + +inline bool object::iterator::key_equals_case_insensitive(std::string_view o) const noexcept { + // We use the fact that the key length can be computed quickly + // without access to the string buffer. + const uint32_t len = key_length(); + if(o.size() == len) { + // See For case-insensitive string comparisons, avoid char-by-char functions + // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/ + // Note that it might be worth rolling our own strncasecmp function, with vectorization. + return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0); + } + return false; +} +// +// key_value_pair inline implementation +// +inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept : + key(_key), value(_value) {} + +} // namespace dom + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +static_assert(std::ranges::view); +static_assert(std::ranges::sized_range); +#if SIMDJSON_EXCEPTIONS +static_assert(std::ranges::view>); +static_assert(std::ranges::sized_range>); +#endif // SIMDJSON_EXCEPTIONS +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_INLINE_OBJECT_H +/* end file include/simdjson/dom/object-inl.h */ +/* begin file include/simdjson/dom/parsedjson_iterator-inl.h */ +#ifndef SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H +#define SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H + +#include + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + +namespace simdjson { + +// VS2017 reports deprecated warnings when you define a deprecated class's methods. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_DEPRECATED_WARNING + +// Because of template weirdness, the actual class definition is inline in the document class +simdjson_warn_unused bool dom::parser::Iterator::is_ok() const { + return location < tape_length; +} + +// useful for debugging purposes +size_t dom::parser::Iterator::get_tape_location() const { + return location; +} + +// useful for debugging purposes +size_t dom::parser::Iterator::get_tape_length() const { + return tape_length; +} + +// returns the current depth (start at 1 with 0 reserved for the fictitious root +// node) +size_t dom::parser::Iterator::get_depth() const { + return depth; +} + +// A scope is a series of nodes at the same depth, typically it is either an +// object ({) or an array ([). The root node has type 'r'. +uint8_t dom::parser::Iterator::get_scope_type() const { + return depth_index[depth].scope_type; +} + +bool dom::parser::Iterator::move_forward() { + if (location + 1 >= tape_length) { + return false; // we are at the end! + } + + if ((current_type == '[') || (current_type == '{')) { + // We are entering a new scope + depth++; + assert(depth < max_depth); + depth_index[depth].start_of_scope = location; + depth_index[depth].scope_type = current_type; + } else if ((current_type == ']') || (current_type == '}')) { + // Leaving a scope. + depth--; + } else if (is_number()) { + // these types use 2 locations on the tape, not just one. + location += 1; + } + + location += 1; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; +} + +void dom::parser::Iterator::move_to_value() { + // assume that we are on a key, so move by 1. + location += 1; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); +} + +bool dom::parser::Iterator::move_to_key(const char *key) { + if (down()) { + do { + const bool right_key = (strcmp(get_string(), key) == 0); + move_to_value(); + if (right_key) { + return true; + } + } while (next()); + up(); + } + return false; +} + +bool dom::parser::Iterator::move_to_key_insensitive( + const char *key) { + if (down()) { + do { + const bool right_key = (simdjson_strcasecmp(get_string(), key) == 0); + move_to_value(); + if (right_key) { + return true; + } + } while (next()); + up(); + } + return false; +} + +bool dom::parser::Iterator::move_to_key(const char *key, + uint32_t length) { + if (down()) { + do { + bool right_key = ((get_string_length() == length) && + (memcmp(get_string(), key, length) == 0)); + move_to_value(); + if (right_key) { + return true; + } + } while (next()); + up(); + } + return false; +} + +bool dom::parser::Iterator::move_to_index(uint32_t index) { + if (down()) { + uint32_t i = 0; + for (; i < index; i++) { + if (!next()) { + break; + } + } + if (i == index) { + return true; + } + up(); + } + return false; +} + +bool dom::parser::Iterator::prev() { + size_t target_location = location; + to_start_scope(); + size_t npos = location; + if (target_location == npos) { + return false; // we were already at the start + } + size_t oldnpos; + // we have that npos < target_location here + do { + oldnpos = npos; + if ((current_type == '[') || (current_type == '{')) { + // we need to jump + npos = uint32_t(current_val); + } else { + npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1); + } + } while (npos < target_location); + location = oldnpos; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; +} + +bool dom::parser::Iterator::up() { + if (depth == 1) { + return false; // don't allow moving back to root + } + to_start_scope(); + // next we just move to the previous value + depth--; + location -= 1; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; +} + +bool dom::parser::Iterator::down() { + if (location + 1 >= tape_length) { + return false; + } + if ((current_type == '[') || (current_type == '{')) { + size_t npos = uint32_t(current_val); + if (npos == location + 2) { + return false; // we have an empty scope + } + depth++; + assert(depth < max_depth); + location = location + 1; + depth_index[depth].start_of_scope = location; + depth_index[depth].scope_type = current_type; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; + } + return false; +} + +void dom::parser::Iterator::to_start_scope() { + location = depth_index[depth].start_of_scope; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); +} + +bool dom::parser::Iterator::next() { + size_t npos; + if ((current_type == '[') || (current_type == '{')) { + // we need to jump + npos = uint32_t(current_val); + } else { + npos = location + (is_number() ? 2 : 1); + } + uint64_t next_val = doc.tape[npos]; + uint8_t next_type = uint8_t(next_val >> 56); + if ((next_type == ']') || (next_type == '}')) { + return false; // we reached the end of the scope + } + location = npos; + current_val = next_val; + current_type = next_type; + return true; +} +dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false) + : doc(pj.doc) +{ +#if SIMDJSON_EXCEPTIONS + if (!pj.valid) { throw simdjson_error(pj.error); } +#else + if (!pj.valid) { return; } // abort() usage is forbidden in the library +#endif + + max_depth = pj.max_depth(); + depth_index = new scopeindex_t[max_depth + 1]; + depth_index[0].start_of_scope = location; + current_val = doc.tape[location++]; + current_type = uint8_t(current_val >> 56); + depth_index[0].scope_type = current_type; + tape_length = size_t(current_val & internal::JSON_VALUE_MASK); + if (location < tape_length) { + // If we make it here, then depth_capacity must >=2, but the compiler + // may not know this. + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + depth++; + assert(depth < max_depth); + depth_index[depth].start_of_scope = location; + depth_index[depth].scope_type = current_type; + } +} +dom::parser::Iterator::Iterator( + const dom::parser::Iterator &o) noexcept + : doc(o.doc), + max_depth(o.depth), + depth(o.depth), + location(o.location), + tape_length(o.tape_length), + current_type(o.current_type), + current_val(o.current_val) +{ + depth_index = new scopeindex_t[max_depth+1]; + std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0])); +} + +dom::parser::Iterator::~Iterator() noexcept { + if (depth_index) { delete[] depth_index; } +} + +bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const { + if (!is_ok()) { + return false; + } + switch (current_type) { + case '"': // we have a string + os << '"'; + if (escape_strings) { + os << internal::escape_json_string(std::string_view(get_string(), get_string_length())); + } else { + // was: os << get_string();, but given that we can include null chars, we + // have to do something crazier: + std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator(os)); + } + os << '"'; + break; + case 'l': // we have a long int + os << get_integer(); + break; + case 'u': + os << get_unsigned_integer(); + break; + case 'd': + os << get_double(); + break; + case 'n': // we have a null + os << "null"; + break; + case 't': // we have a true + os << "true"; + break; + case 'f': // we have a false + os << "false"; + break; + case '{': // we have an object + case '}': // we end an object + case '[': // we start an array + case ']': // we end an array + os << char(current_type); + break; + default: + return false; + } + return true; +} + +bool dom::parser::Iterator::move_to(const char *pointer, + uint32_t length) { + char *new_pointer = nullptr; + if (pointer[0] == '#') { + // Converting fragment representation to string representation + new_pointer = new char[length]; + uint32_t new_length = 0; + for (uint32_t i = 1; i < length; i++) { + if (pointer[i] == '%' && pointer[i + 1] == 'x') { +#if __cpp_exceptions + try { +#endif + int fragment = + std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16); + if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) { + // escaping the character + new_pointer[new_length] = '\\'; + new_length++; + } + new_pointer[new_length] = char(fragment); + i += 3; +#if __cpp_exceptions + } catch (std::invalid_argument &) { + delete[] new_pointer; + return false; // the fragment is invalid + } +#endif + } else { + new_pointer[new_length] = pointer[i]; + } + new_length++; + } + length = new_length; + pointer = new_pointer; + } + + // saving the current state + size_t depth_s = depth; + size_t location_s = location; + uint8_t current_type_s = current_type; + uint64_t current_val_s = current_val; + + rewind(); // The json pointer is used from the root of the document. + + bool found = relative_move_to(pointer, length); + delete[] new_pointer; + + if (!found) { + // since the pointer has found nothing, we get back to the original + // position. + depth = depth_s; + location = location_s; + current_type = current_type_s; + current_val = current_val_s; + } + + return found; +} + +bool dom::parser::Iterator::relative_move_to(const char *pointer, + uint32_t length) { + if (length == 0) { + // returns the whole document + return true; + } + + if (pointer[0] != '/') { + // '/' must be the first character + return false; + } + + // finding the key in an object or the index in an array + std::string key_or_index; + uint32_t offset = 1; + + // checking for the "-" case + if (is_array() && pointer[1] == '-') { + if (length != 2) { + // the pointer must be exactly "/-" + // there can't be anything more after '-' as an index + return false; + } + key_or_index = '-'; + offset = length; // will skip the loop coming right after + } + + // We either transform the first reference token to a valid json key + // or we make sure it is a valid index in an array. + for (; offset < length; offset++) { + if (pointer[offset] == '/') { + // beginning of the next key or index + break; + } + if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) { + // the index of an array must be an integer + // we also make sure std::stoi won't discard whitespaces later + return false; + } + if (pointer[offset] == '~') { + // "~1" represents "/" + if (pointer[offset + 1] == '1') { + key_or_index += '/'; + offset++; + continue; + } + // "~0" represents "~" + if (pointer[offset + 1] == '0') { + key_or_index += '~'; + offset++; + continue; + } + } + if (pointer[offset] == '\\') { + if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' || + (pointer[offset + 1] <= 0x1F)) { + key_or_index += pointer[offset + 1]; + offset++; + continue; + } + return false; // invalid escaped character + } + if (pointer[offset] == '\"') { + // unescaped quote character. this is an invalid case. + // lets do nothing and assume most pointers will be valid. + // it won't find any corresponding json key anyway. + // return false; + } + key_or_index += pointer[offset]; + } + + bool found = false; + if (is_object()) { + if (move_to_key(key_or_index.c_str(), uint32_t(key_or_index.length()))) { + found = relative_move_to(pointer + offset, length - offset); + } + } else if (is_array()) { + if (key_or_index == "-") { // handling "-" case first + if (down()) { + while (next()) + ; // moving to the end of the array + // moving to the nonexistent value right after... + size_t npos; + if ((current_type == '[') || (current_type == '{')) { + // we need to jump + npos = uint32_t(current_val); + } else { + npos = + location + ((current_type == 'd' || current_type == 'l') ? 2 : 1); + } + location = npos; + current_val = doc.tape[npos]; + current_type = uint8_t(current_val >> 56); + return true; // how could it fail ? + } + } else { // regular numeric index + // The index can't have a leading '0' + if (key_or_index[0] == '0' && key_or_index.length() > 1) { + return false; + } + // it cannot be empty + if (key_or_index.length() == 0) { + return false; + } + // we already checked the index contains only valid digits + uint32_t index = std::stoi(key_or_index); + if (move_to_index(index)) { + found = relative_move_to(pointer + offset, length - offset); + } + } + } + + return found; +} + +SIMDJSON_POP_DISABLE_WARNINGS +} // namespace simdjson + +#endif // SIMDJSON_DISABLE_DEPRECATED_API + + +#endif // SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H +/* end file include/simdjson/dom/parsedjson_iterator-inl.h */ +/* begin file include/simdjson/dom/parser-inl.h */ +#ifndef SIMDJSON_INLINE_PARSER_H +#define SIMDJSON_INLINE_PARSER_H + +#include +#include + +namespace simdjson { +namespace dom { + +// +// parser inline implementation +// +simdjson_really_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity}, + loaded_bytes(nullptr) { +} +simdjson_really_inline parser::parser(parser &&other) noexcept = default; +simdjson_really_inline parser &parser::operator=(parser &&other) noexcept = default; + +inline bool parser::is_valid() const noexcept { return valid; } +inline int parser::get_error_code() const noexcept { return error; } +inline std::string parser::get_error_message() const noexcept { return error_message(error); } + +inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { + return valid ? doc.dump_raw_tape(os) : false; +} + +inline simdjson_result parser::read_file(const std::string &path) noexcept { + // Open the file + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + std::FILE *fp = std::fopen(path.c_str(), "rb"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (fp == nullptr) { + return IO_ERROR; + } + + // Get the file size + if(std::fseek(fp, 0, SEEK_END) < 0) { + std::fclose(fp); + return IO_ERROR; + } +#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS + __int64 len = _ftelli64(fp); + if(len == -1L) { + std::fclose(fp); + return IO_ERROR; + } +#else + long len = std::ftell(fp); + if((len < 0) || (len == LONG_MAX)) { + std::fclose(fp); + return IO_ERROR; + } +#endif + + // Make sure we have enough capacity to load the file + if (_loaded_bytes_capacity < size_t(len)) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + std::fclose(fp); + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + + // Read the string + std::rewind(fp); + size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp); + if (std::fclose(fp) != 0 || bytes_read != size_t(len)) { + return IO_ERROR; + } + + return bytes_read; +} + +inline simdjson_result parser::load(const std::string &path) & noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + return parse(loaded_bytes.get(), len, false); +} + +inline simdjson_result parser::load_many(const std::string &path, size_t batch_size) noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + return document_stream(*this, reinterpret_cast(loaded_bytes.get()), len, batch_size); +} + +inline simdjson_result parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { + // Important: we need to ensure that document has enough capacity. + // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! + error_code _error = ensure_capacity(provided_doc, len); + if (_error) { return _error; } + if (realloc_if_needed) { + // Make sure we have enough capacity to copy len bytes + if (!loaded_bytes || _loaded_bytes_capacity < len) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + std::memcpy(static_cast(loaded_bytes.get()), buf, len); + } + _error = implementation->parse(realloc_if_needed ? reinterpret_cast(loaded_bytes.get()): buf, len, provided_doc); + + if (_error) { return _error; } + + return provided_doc.root(); +} + +simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse_into_document(provided_doc, reinterpret_cast(buf), len, realloc_if_needed); +} +simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { + return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +} +simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { + return parse_into_document(provided_doc, s.data(), s.length(), false); +} + + +inline simdjson_result parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse_into_document(doc, buf, len, realloc_if_needed); +} + +simdjson_really_inline simdjson_result parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse(reinterpret_cast(buf), len, realloc_if_needed); +} +simdjson_really_inline simdjson_result parser::parse(const std::string &s) & noexcept { + return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +} +simdjson_really_inline simdjson_result parser::parse(const padded_string &s) & noexcept { + return parse(s.data(), s.length(), false); +} + +inline simdjson_result parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + return document_stream(*this, buf, len, batch_size); +} +inline simdjson_result parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { + return parse_many(reinterpret_cast(buf), len, batch_size); +} +inline simdjson_result parser::parse_many(const std::string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} +inline simdjson_result parser::parse_many(const padded_string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} + +simdjson_really_inline size_t parser::capacity() const noexcept { + return implementation ? implementation->capacity() : 0; +} +simdjson_really_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_really_inline size_t parser::max_depth() const noexcept { + return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; +} + +simdjson_warn_unused +inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { + // + // Reallocate implementation if needed + // + error_code err; + if (implementation) { + err = implementation->allocate(capacity, max_depth); + } else { + err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation); + } + if (err) { return err; } + return SUCCESS; +} + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +simdjson_warn_unused +inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { + return !allocate(capacity, max_depth); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { + return ensure_capacity(doc, desired_capacity); +} + + +inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { + // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. + // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. + if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } + // If we don't have enough capacity, (try to) automatically bump it. + // If the document needs allocation, do it too. + // Both in one if statement to minimize unlikely branching. + // + // Note: we must make sure that this function is called if capacity() == 0. We do so because we + // ensure that desired_capacity > 0. + if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { + if (desired_capacity > max_capacity()) { + return error = CAPACITY; + } + error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS; + error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS; + if(err1 != SUCCESS) { return error = err1; } + if(err2 != SUCCESS) { return error = err2; } + } + return SUCCESS; +} + +simdjson_really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = MINIMAL_DOCUMENT_CAPACITY; + } +} + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_INLINE_PARSER_H +/* end file include/simdjson/dom/parser-inl.h */ +/* begin file include/simdjson/internal/tape_ref-inl.h */ +#ifndef SIMDJSON_INLINE_TAPE_REF_H +#define SIMDJSON_INLINE_TAPE_REF_H + +#include + +namespace simdjson { +namespace internal { + +// +// tape_ref inline implementation +// +simdjson_really_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} +simdjson_really_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} + + +simdjson_really_inline bool tape_ref::is_document_root() const noexcept { + return json_index == 1; // should we ever change the structure of the tape, this should get updated. +} + +// Some value types have a specific on-tape word value. It can be faster +// to check the type by doing a word-to-word comparison instead of extracting the +// most significant 8 bits. + +simdjson_really_inline bool tape_ref::is_double() const noexcept { + constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56; + return doc->tape[json_index] == tape_double; +} +simdjson_really_inline bool tape_ref::is_int64() const noexcept { + constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56; + return doc->tape[json_index] == tape_int64; +} +simdjson_really_inline bool tape_ref::is_uint64() const noexcept { + constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56; + return doc->tape[json_index] == tape_uint64; +} +simdjson_really_inline bool tape_ref::is_false() const noexcept { + constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56; + return doc->tape[json_index] == tape_false; +} +simdjson_really_inline bool tape_ref::is_true() const noexcept { + constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56; + return doc->tape[json_index] == tape_true; +} +simdjson_really_inline bool tape_ref::is_null_on_tape() const noexcept { + constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56; + return doc->tape[json_index] == tape_null; +} + +inline size_t tape_ref::after_element() const noexcept { + switch (tape_ref_type()) { + case tape_type::START_ARRAY: + case tape_type::START_OBJECT: + return matching_brace_index(); + case tape_type::UINT64: + case tape_type::INT64: + case tape_type::DOUBLE: + return json_index + 2; + default: + return json_index + 1; + } +} +simdjson_really_inline tape_type tape_ref::tape_ref_type() const noexcept { + return static_cast(doc->tape[json_index] >> 56); +} +simdjson_really_inline uint64_t internal::tape_ref::tape_value() const noexcept { + return doc->tape[json_index] & internal::JSON_VALUE_MASK; +} +simdjson_really_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { + return uint32_t(doc->tape[json_index]); +} +simdjson_really_inline uint32_t internal::tape_ref::scope_count() const noexcept { + return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); +} + +template +simdjson_really_inline T tape_ref::next_tape_value() const noexcept { + static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); + // Though the following is tempting... + // return *reinterpret_cast(&doc->tape[json_index + 1]); + // It is not generally safe. It is safer, and often faster to rely + // on memcpy. Yes, it is uglier, but it is also encapsulated. + T x; + std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t)); + return x; +} + +simdjson_really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { + size_t string_buf_index = size_t(tape_value()); + uint32_t len; + std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); + return len; +} + +simdjson_really_inline const char * internal::tape_ref::get_c_str() const noexcept { + size_t string_buf_index = size_t(tape_value()); + return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); +} + +inline std::string_view internal::tape_ref::get_string_view() const noexcept { + return std::string_view( + get_c_str(), + get_string_length() + ); +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INLINE_TAPE_REF_H +/* end file include/simdjson/internal/tape_ref-inl.h */ +/* begin file include/simdjson/dom/serialization-inl.h */ + +#ifndef SIMDJSON_SERIALIZATION_INL_H +#define SIMDJSON_SERIALIZATION_INL_H + + +#include +#include + +namespace simdjson { +namespace dom { +inline bool parser::print_json(std::ostream &os) const noexcept { + if (!valid) { return false; } + simdjson::internal::string_builder<> sb; + sb.append(doc.root()); + std::string_view answer = sb.str(); + os << answer; + return true; +} +} +/*** + * Number utility functions + **/ + + +namespace { +/**@private + * Escape sequence like \b or \u0001 + * We expect that most compilers will use 8 bytes for this data structure. + **/ +struct escape_sequence { + uint8_t length; + const char string[7]; // technically, we only ever need 6 characters, we pad to 8 +}; +/**@private + * This converts a signed integer into a character sequence. + * The caller is responsible for providing enough memory (at least + * 20 characters.) + * Though various runtime libraries provide itoa functions, + * it is not part of the C++ standard. The C++17 standard + * adds the to_chars functions which would do as well, but + * we want to support C++11. + */ +char *fast_itoa(char *output, int64_t value) noexcept { + // This is a standard implementation of itoa. + char buffer[20]; + uint64_t value_positive; + // In general, negating a signed integer is unsafe. + if(value < 0) { + *output++ = '-'; + // Doing value_positive = -value; while avoiding + // undefined behavior warnings. + // It assumes two complement's which is universal at this + // point in time. + std::memcpy(&value_positive, &value, sizeof(value)); + value_positive = (~value_positive) + 1; // this is a negation + } else { + value_positive = value; + } + // We work solely with value_positive. It *might* be easier + // for an optimizing compiler to deal with an unsigned variable + // as far as performance goes. + const char *const end_buffer = buffer + 20; + char *write_pointer = buffer + 19; + // A faster approach is possible if we expect large integers: + // unroll the loop (work in 100s, 1000s) and use some kind of + // memoization. + while(value_positive >= 10) { + *write_pointer-- = char('0' + (value_positive % 10)); + value_positive /= 10; + } + *write_pointer = char('0' + value_positive); + size_t len = end_buffer - write_pointer; + std::memcpy(output, write_pointer, len); + return output + len; +} +/**@private + * This converts an unsigned integer into a character sequence. + * The caller is responsible for providing enough memory (at least + * 19 characters.) + * Though various runtime libraries provide itoa functions, + * it is not part of the C++ standard. The C++17 standard + * adds the to_chars functions which would do as well, but + * we want to support C++11. + */ +char *fast_itoa(char *output, uint64_t value) noexcept { + // This is a standard implementation of itoa. + char buffer[20]; + const char *const end_buffer = buffer + 20; + char *write_pointer = buffer + 19; + // A faster approach is possible if we expect large integers: + // unroll the loop (work in 100s, 1000s) and use some kind of + // memoization. + while(value >= 10) { + *write_pointer-- = char('0' + (value % 10)); + value /= 10; + }; + *write_pointer = char('0' + value); + size_t len = end_buffer - write_pointer; + std::memcpy(output, write_pointer, len); + return output + len; +} +} // anonymous namespace +namespace internal { + +/*** + * Minifier/formatter code. + **/ + +simdjson_really_inline void mini_formatter::number(uint64_t x) { + char number_buffer[24]; + char *newp = fast_itoa(number_buffer, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +simdjson_really_inline void mini_formatter::number(int64_t x) { + char number_buffer[24]; + char *newp = fast_itoa(number_buffer, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +simdjson_really_inline void mini_formatter::number(double x) { + char number_buffer[24]; + // Currently, passing the nullptr to the second argument is + // safe because our implementation does not check the second + // argument. + char *newp = internal::to_chars(number_buffer, nullptr, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +simdjson_really_inline void mini_formatter::start_array() { one_char('['); } +simdjson_really_inline void mini_formatter::end_array() { one_char(']'); } +simdjson_really_inline void mini_formatter::start_object() { one_char('{'); } +simdjson_really_inline void mini_formatter::end_object() { one_char('}'); } +simdjson_really_inline void mini_formatter::comma() { one_char(','); } + + +simdjson_really_inline void mini_formatter::true_atom() { + const char * s = "true"; + buffer.insert(buffer.end(), s, s + 4); +} +simdjson_really_inline void mini_formatter::false_atom() { + const char * s = "false"; + buffer.insert(buffer.end(), s, s + 5); +} +simdjson_really_inline void mini_formatter::null_atom() { + const char * s = "null"; + buffer.insert(buffer.end(), s, s + 4); +} +simdjson_really_inline void mini_formatter::one_char(char c) { buffer.push_back(c); } +simdjson_really_inline void mini_formatter::key(std::string_view unescaped) { + string(unescaped); + one_char(':'); +} +simdjson_really_inline void mini_formatter::string(std::string_view unescaped) { + one_char('\"'); + size_t i = 0; + // Fast path for the case where we have no control character, no ", and no backslash. + // This should include most keys. + // + // We would like to use 'bool' but some compilers take offense to bitwise operation + // with bool types. + constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for(;i + 8 <= unescaped.length(); i += 8) { + // Poor's man vectorization. This could get much faster if we used SIMD. + // + // It is not the case that replacing '|' with '||' would be neutral performance-wise. + if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] + | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] + | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] + | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])] + ) { break; } + } + for(;i < unescaped.length(); i++) { + if(needs_escaping[uint8_t(unescaped[i])]) { break; } + } + // The following is also possible and omits a 256-byte table, but it is slower: + // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) + // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {} + + // At least for long strings, the following should be fast. We could + // do better by integrating the checks and the insertion. + buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i); + // We caught a control character if we enter this loop (slow). + // Note that we are do not restart from the beginning, but rather we continue + // from the point where we encountered something that requires escaping. + for (; i < unescaped.length(); i++) { + switch (unescaped[i]) { + case '\"': + { + const char * s = "\\\""; + buffer.insert(buffer.end(), s, s + 2); + } + break; + case '\\': + { + const char * s = "\\\\"; + buffer.insert(buffer.end(), s, s + 2); + } + break; + default: + if (uint8_t(unescaped[i]) <= 0x1F) { + // If packed, this uses 8 * 32 bytes. + // Note that we expect most compilers to embed this code in the data + // section. + constexpr static escape_sequence escaped[32] = { + {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, + {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, + {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, + {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, + {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, + {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, + {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, + {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; + auto u = escaped[uint8_t(unescaped[i])]; + buffer.insert(buffer.end(), u.string, u.string + u.length); + } else { + one_char(unescaped[i]); + } + } // switch + } // for + one_char('\"'); +} + +inline void mini_formatter::clear() { + buffer.clear(); +} + +simdjson_really_inline std::string_view mini_formatter::str() const { + return std::string_view(buffer.data(), buffer.size()); +} + + +/*** + * String building code. + **/ + +template +inline void string_builder::append(simdjson::dom::element value) { + // using tape_type = simdjson::internal::tape_type; + size_t depth = 0; + constexpr size_t MAX_DEPTH = 16; + bool is_object[MAX_DEPTH]; + is_object[0] = false; + bool after_value = false; + + internal::tape_ref iter(value.tape); + do { + // print commas after each value + if (after_value) { + format.comma(); + } + // If we are in an object, print the next key and :, and skip to the next + // value. + if (is_object[depth]) { + format.key(iter.get_string_view()); + iter.json_index++; + } + switch (iter.tape_ref_type()) { + + // Arrays + case tape_type::START_ARRAY: { + // If we're too deep, we need to recurse to go deeper. + depth++; + if (simdjson_unlikely(depth >= MAX_DEPTH)) { + append(simdjson::dom::array(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the ] + depth--; + break; + } + + // Output start [ + format.start_array(); + iter.json_index++; + + // Handle empty [] (we don't want to come back around and print commas) + if (iter.tape_ref_type() == tape_type::END_ARRAY) { + format.end_array(); + depth--; + break; + } + + is_object[depth] = false; + after_value = false; + continue; + } + + // Objects + case tape_type::START_OBJECT: { + // If we're too deep, we need to recurse to go deeper. + depth++; + if (simdjson_unlikely(depth >= MAX_DEPTH)) { + append(simdjson::dom::object(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the } + depth--; + break; + } + + // Output start { + format.start_object(); + iter.json_index++; + + // Handle empty {} (we don't want to come back around and print commas) + if (iter.tape_ref_type() == tape_type::END_OBJECT) { + format.end_object(); + depth--; + break; + } + + is_object[depth] = true; + after_value = false; + continue; + } + + // Scalars + case tape_type::STRING: + format.string(iter.get_string_view()); + break; + case tape_type::INT64: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::UINT64: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::DOUBLE: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::TRUE_VALUE: + format.true_atom(); + break; + case tape_type::FALSE_VALUE: + format.false_atom(); + break; + case tape_type::NULL_VALUE: + format.null_atom(); + break; + + // These are impossible + case tape_type::END_ARRAY: + case tape_type::END_OBJECT: + case tape_type::ROOT: + SIMDJSON_UNREACHABLE(); + } + iter.json_index++; + after_value = true; + + // Handle multiple ends in a row + while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || + iter.tape_ref_type() == tape_type::END_OBJECT)) { + if (iter.tape_ref_type() == tape_type::END_ARRAY) { + format.end_array(); + } else { + format.end_object(); + } + depth--; + iter.json_index++; + } + + // Stop when we're at depth 0 + } while (depth != 0); +} + +template +inline void string_builder::append(simdjson::dom::object value) { + format.start_object(); + auto pair = value.begin(); + auto end = value.end(); + if (pair != end) { + append(*pair); + for (++pair; pair != end; ++pair) { + format.comma(); + append(*pair); + } + } + format.end_object(); +} + +template +inline void string_builder::append(simdjson::dom::array value) { + format.start_array(); + auto iter = value.begin(); + auto end = value.end(); + if (iter != end) { + append(*iter); + for (++iter; iter != end; ++iter) { + format.comma(); + append(*iter); + } + } + format.end_array(); +} + +template +simdjson_really_inline void string_builder::append(simdjson::dom::key_value_pair kv) { + format.key(kv.key); + append(kv.value); +} + +template +simdjson_really_inline void string_builder::clear() { + format.clear(); +} + +template +simdjson_really_inline std::string_view string_builder::str() const { + return format.str(); +} + + +} // namespace internal +} // namespace simdjson + +#endif +/* end file include/simdjson/dom/serialization-inl.h */ + +SIMDJSON_POP_DISABLE_WARNINGS + +#endif // SIMDJSON_DOM_H +/* end file include/simdjson/dom.h */ +/* begin file include/simdjson/builtin.h */ +#ifndef SIMDJSON_BUILTIN_H +#define SIMDJSON_BUILTIN_H + +/* begin file include/simdjson/implementations.h */ +#ifndef SIMDJSON_IMPLEMENTATIONS_H +#define SIMDJSON_IMPLEMENTATIONS_H + +/* begin file include/simdjson/implementation-base.h */ +#ifndef SIMDJSON_IMPLEMENTATION_BASE_H +#define SIMDJSON_IMPLEMENTATION_BASE_H + +/** + * @file + * + * Includes common stuff needed for implementations. + */ + + +// Implementation-internal files (must be included before the implementations themselves, to keep +// amalgamation working--otherwise, the first time a file is included, it might be put inside the +// #ifdef SIMDJSON_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other implementations can't +// compile unless that implementation is turned on). +/* begin file include/simdjson/internal/jsoncharutils_tables.h */ +#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H + + +#ifdef JSON_TEST_STRINGS +void found_string(const uint8_t *buf, const uint8_t *parsed_begin, + const uint8_t *parsed_end); +void found_bad_string(const uint8_t *buf); +#endif + +namespace simdjson { +namespace internal { +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256]; +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256]; +extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +/* end file include/simdjson/internal/jsoncharutils_tables.h */ +/* begin file include/simdjson/internal/numberparsing_tables.h */ +#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H + + +namespace simdjson { +namespace internal { +/** + * The smallest non-zero float (binary64) is 2^-1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +constexpr int smallest_power = -342; +constexpr int largest_power = 308; + +/** + * Represents a 128-bit value. + * low: least significant 64 bits. + * high: most significant 64 bits. + */ +struct value128 { + uint64_t low; + uint64_t high; +}; + + +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; + + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + + +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +/* end file include/simdjson/internal/numberparsing_tables.h */ +/* begin file include/simdjson/internal/simdprune_tables.h */ +#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H + +#include + +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable + +extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256]; + +extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; + +// 256 * 8 bytes = 2kB, easily fits in cache. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +/* end file include/simdjson/internal/simdprune_tables.h */ + +#endif // SIMDJSON_IMPLEMENTATION_BASE_H +/* end file include/simdjson/implementation-base.h */ + +// +// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order +// in which we include them. +// + +#ifndef SIMDJSON_IMPLEMENTATION_ARM64 +#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 + +#ifdef __has_include +// How do we detect that a compiler supports vbmi2? +// For sure if the following header is found, we are ok? +#if __has_include() +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +#ifdef _MSC_VER +#if _MSC_VER >= 1920 +// Visual Studio 2019 and up support VBMI2 under x64 even if the header +// avx512vbmi2intrin.h is not found. +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +// By default, we allow AVX512. +#ifndef SIMDJSON_AVX512_ALLOWED +#define SIMDJSON_AVX512_ALLOWED 1 +#endif + +// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE +#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2)) +#endif + +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#endif + +// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_HASWELL +#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 +#endif +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) +#endif + +// Default Westmere to on if this is x86-64. Note that the macro SIMDJSON_REQUIRES_HASWELL appears unused. +#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE +#define SIMDJSON_IMPLEMENTATION_WESTMERE (SIMDJSON_IS_X86_64 && !SIMDJSON_REQUIRES_HASWELL) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) + +#ifndef SIMDJSON_IMPLEMENTATION_PPC64 +#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 + +// Default Fallback to on unless a builtin implementation has already been selected. +#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK +#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 // (!SIMDJSON_CAN_ALWAYS_RUN_ARM64 && !SIMDJSON_CAN_ALWAYS_RUN_HASWELL && !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE && !SIMDJSON_CAN_ALWAYS_RUN_PPC64) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_UNDESIRED_WARNINGS + +// Implementations +/* begin file include/simdjson/arm64.h */ +#ifndef SIMDJSON_ARM64_H +#define SIMDJSON_ARM64_H + + +#if SIMDJSON_IMPLEMENTATION_ARM64 + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { +} // namespace arm64 +} // namespace simdjson + +/* begin file include/simdjson/arm64/implementation.h */ +#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H +#define SIMDJSON_ARM64_IMPLEMENTATION_H + + +namespace simdjson { +namespace arm64 { + +namespace { +using namespace simdjson; +using namespace simdjson::dom; +} + +class implementation final : public simdjson::implementation { +public: + simdjson_really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_IMPLEMENTATION_H +/* end file include/simdjson/arm64/implementation.h */ + +/* begin file include/simdjson/arm64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "arm64" +// #define SIMDJSON_IMPLEMENTATION arm64 +/* end file include/simdjson/arm64/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace arm64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { +namespace arm64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace arm64 +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file include/simdjson/arm64/intrinsics.h */ +/* begin file include/simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_really_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_really_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_really_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_really_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_really_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file include/simdjson/arm64/bitmanipulation.h */ +/* begin file include/simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif +/* end file include/simdjson/arm64/bitmask.h */ +/* begin file include/simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +#include + + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +/** + * make_uint8x16_t initializes a SIMD register (uint8x16_t). + * This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...} + * is not recognized under Visual Studio! This is a workaround. + * Using a std::initializer_list as a parameter resulted in + * inefficient code. With the current approach, if the parameters are + * compile-time constants, + * GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}. + * You should not use this function except for compile-time constants: + * it is not efficient. + */ +simdjson_really_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, + uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, + uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) { + // Doing a load like so end ups generating worse code. + // uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_u8(array); + uint8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_u8(x1, x, 0); + x = vsetq_lane_u8(x2, x, 1); + x = vsetq_lane_u8(x3, x, 2); + x = vsetq_lane_u8(x4, x, 3); + x = vsetq_lane_u8(x5, x, 4); + x = vsetq_lane_u8(x6, x, 5); + x = vsetq_lane_u8(x7, x, 6); + x = vsetq_lane_u8(x8, x, 7); + x = vsetq_lane_u8(x9, x, 8); + x = vsetq_lane_u8(x10, x, 9); + x = vsetq_lane_u8(x11, x, 10); + x = vsetq_lane_u8(x12, x, 11); + x = vsetq_lane_u8(x13, x, 12); + x = vsetq_lane_u8(x14, x, 13); + x = vsetq_lane_u8(x15, x, 14); + x = vsetq_lane_u8(x16, x, 15); + return x; +} + +simdjson_really_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8) { + uint8x8_t x{}; + x = vset_lane_u8(x1, x, 0); + x = vset_lane_u8(x2, x, 1); + x = vset_lane_u8(x3, x, 2); + x = vset_lane_u8(x4, x, 3); + x = vset_lane_u8(x5, x, 4); + x = vset_lane_u8(x6, x, 5); + x = vset_lane_u8(x7, x, 6); + x = vset_lane_u8(x8, x, 7); + return x; +} + +// We have to do the same work for make_int8x16_t +simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, + int8_t x5, int8_t x6, int8_t x7, int8_t x8, + int8_t x9, int8_t x10, int8_t x11, int8_t x12, + int8_t x13, int8_t x14, int8_t x15, int8_t x16) { + // Doing a load like so end ups generating worse code. + // int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_s8(array); + int8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_s8(x1, x, 0); + x = vsetq_lane_s8(x2, x, 1); + x = vsetq_lane_s8(x3, x, 2); + x = vsetq_lane_s8(x4, x, 3); + x = vsetq_lane_s8(x5, x, 4); + x = vsetq_lane_s8(x6, x, 5); + x = vsetq_lane_s8(x7, x, 6); + x = vsetq_lane_s8(x8, x, 7); + x = vsetq_lane_s8(x9, x, 8); + x = vsetq_lane_s8(x10, x, 9); + x = vsetq_lane_s8(x11, x, 10); + x = vsetq_lane_s8(x12, x, 11); + x = vsetq_lane_s8(x13, x, 12); + x = vsetq_lane_s8(x14, x, 13); + x = vsetq_lane_s8(x15, x, 14); + x = vsetq_lane_s8(x16, x, 15); + return x; +} + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template + struct simd8; + + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_really_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_really_inline operator const uint8x16_t&() const { return this->value; } + simdjson_really_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_really_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_really_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_really_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_really_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_really_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_really_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_really_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + + template + simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_really_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_really_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_really_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_really_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_really_inline uint32_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_really_inline bool any() const { return vmaxvq_u8(*this) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_really_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_really_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_really_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_really_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_really_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_really_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_really_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_really_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_really_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_really_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_really_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_really_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_really_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_really_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_really_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_really_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_really_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_really_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_really_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_really_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + + // Bit-specific operations + simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_really_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_really_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_really_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_really_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_really_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template + simdjson_really_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_really_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; + + // Signed bytes + template<> + struct simd8 { + int8x16_t value; + + static simdjson_really_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_really_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_really_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_really_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_really_inline operator const int8x16_t&() const { return this->value; } + simdjson_really_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_really_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_really_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_really_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_really_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_really_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_really_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_really_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_really_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_really_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_really_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_really_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_really_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_really_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_really_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + + template + simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_really_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_really_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_really_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_really_inline uint64_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H +/* end file include/simdjson/arm64/simd.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace arm64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/arm64/stringparsing.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_H +#define SIMDJSON_ARM64_STRINGPARSING_H + + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_really_inline bool has_backslash() { return bs_bits != 0; } + simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +/* begin file include/simdjson/generic/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace arm64 { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // check for low surrogate for characters outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + + // if the first code point is invalid we will get here, as we will go past + // the check for being outside the Basic Multilingual plane. If we don't + // find a \u immediately afterwards we fail out anyhow, but if we do, + // this check catches both the case of the first code point being invalid + // or the second code point being invalid. + if ((code_point | code_point_2) >> 16) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; + *src_ptr += 6; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then + * dst needs to have four free bytes. + */ +simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { + if (*(src++) != '"') { return STRING_ERROR; } + auto end = stringparsing::parse_string(src, current_string_buf_loc); + if (!end) { return STRING_ERROR; } + s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); + current_string_buf_loc = end; + return SUCCESS; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file include/simdjson/generic/stringparsing.h */ + +#endif // SIMDJSON_ARM64_STRINGPARSING_H +/* end file include/simdjson/arm64/stringparsing.h */ +/* begin file include/simdjson/arm64/numberparsing.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_H +#define SIMDJSON_ARM64_NUMBERPARSING_H + +namespace simdjson { +namespace arm64 { +namespace { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace arm64 { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + WRITE_DOUBLE(0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + const uint8_t *p = src + negative + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*p != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += negative + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_ARM64_NUMBERPARSING_H +/* end file include/simdjson/arm64/numberparsing.h */ +/* begin file include/simdjson/arm64/end.h */ +/* end file include/simdjson/arm64/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_ARM64 + +#endif // SIMDJSON_ARM64_H +/* end file include/simdjson/arm64.h */ +/* begin file include/simdjson/fallback.h */ +#ifndef SIMDJSON_FALLBACK_H +#define SIMDJSON_FALLBACK_H + + +#if SIMDJSON_IMPLEMENTATION_FALLBACK + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { +} // namespace fallback +} // namespace simdjson + +/* begin file include/simdjson/fallback/implementation.h */ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H + + +namespace simdjson { +namespace fallback { + +namespace { +using namespace simdjson; +using namespace simdjson::dom; +} + +class implementation final : public simdjson::implementation { +public: + simdjson_really_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H +/* end file include/simdjson/fallback/implementation.h */ + +/* begin file include/simdjson/fallback/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "fallback" +// #define SIMDJSON_IMPLEMENTATION fallback +/* end file include/simdjson/fallback/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace fallback { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace fallback +} // namespace simdjson + +namespace simdjson { +namespace fallback { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace fallback +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +#include + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_really_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file include/simdjson/fallback/bitmanipulation.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace fallback { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/fallback/stringparsing.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_H +#define SIMDJSON_FALLBACK_STRINGPARSING_H + + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_really_inline bool has_quote_first() { return c == '"'; } + simdjson_really_inline bool has_backslash() { return c == '\\'; } + simdjson_really_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_really_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +/* begin file include/simdjson/generic/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // check for low surrogate for characters outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + + // if the first code point is invalid we will get here, as we will go past + // the check for being outside the Basic Multilingual plane. If we don't + // find a \u immediately afterwards we fail out anyhow, but if we do, + // this check catches both the case of the first code point being invalid + // or the second code point being invalid. + if ((code_point | code_point_2) >> 16) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; + *src_ptr += 6; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then + * dst needs to have four free bytes. + */ +simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { + if (*(src++) != '"') { return STRING_ERROR; } + auto end = stringparsing::parse_string(src, current_string_buf_loc); + if (!end) { return STRING_ERROR; } + s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); + current_string_buf_loc = end; + return SUCCESS; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file include/simdjson/generic/stringparsing.h */ + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_H +/* end file include/simdjson/fallback/stringparsing.h */ +/* begin file include/simdjson/fallback/numberparsing.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_H + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace { +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} +static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace fallback { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + WRITE_DOUBLE(0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + const uint8_t *p = src + negative + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*p != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += negative + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_H +/* end file include/simdjson/fallback/numberparsing.h */ +/* begin file include/simdjson/fallback/end.h */ +/* end file include/simdjson/fallback/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_FALLBACK +#endif // SIMDJSON_FALLBACK_H +/* end file include/simdjson/fallback.h */ +/* begin file include/simdjson/icelake.h */ +#ifndef SIMDJSON_ICELAKE_H +#define SIMDJSON_ICELAKE_H + + +#if SIMDJSON_IMPLEMENTATION_ICELAKE + +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +#define SIMDJSON_TARGET_ICELAKE +#define SIMDJSON_UNTARGET_ICELAKE +#else +#define SIMDJSON_TARGET_ICELAKE SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt") +#define SIMDJSON_UNTARGET_ICELAKE SIMDJSON_UNTARGET_REGION +#endif + +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { +} // namespace icelake +} // namespace simdjson + +// +// These two need to be included outside SIMDJSON_TARGET_ICELAKE +// +/* begin file include/simdjson/icelake/implementation.h */ +#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H +#define SIMDJSON_ICELAKE_IMPLEMENTATION_H + + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +namespace icelake { + +using namespace simdjson; + +class implementation final : public simdjson::implementation { +public: + simdjson_really_inline implementation() : simdjson::implementation( + "icelake", + "Intel/AMD AVX512", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H +/* end file include/simdjson/icelake/implementation.h */ +/* begin file include/simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + + +#ifdef SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#ifdef SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file include/simdjson/icelake/intrinsics.h */ + +// +// The rest need to be inside the region +// +/* begin file include/simdjson/icelake/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "icelake" +// #define SIMDJSON_IMPLEMENTATION icelake +SIMDJSON_TARGET_ICELAKE +/* end file include/simdjson/icelake/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace icelake { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace icelake +} // namespace simdjson + +namespace simdjson { +namespace icelake { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace icelake +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_really_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_really_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_really_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file include/simdjson/icelake/bitmanipulation.h */ +/* begin file include/simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file include/simdjson/icelake/bitmask.h */ +/* begin file include/simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + + + + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; + + // Zero constructor + simdjson_really_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_really_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_really_inline operator const __m512i&() const { return this->value; } + simdjson_really_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_really_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_really_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_really_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_really_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_really_inline base8() : base>() {} + simdjson_really_inline base8(const __m512i _value) : base>(_value) {} + + simdjson_really_inline uint64_t operator==(const simd8 other) const { return _mm512_cmpeq_epi8_mask(*this, other); } + + static const int SIZE = sizeof(base::value); + + template + simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { +#if SIMDJSON_GCC8 + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); +#else + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), 16 - N); +#endif + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_really_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_really_inline simd8() : base8() {} + simdjson_really_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_really_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_really_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_really_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_really_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_really_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_really_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_really_inline base8_numeric() : base8() {} + simdjson_really_inline base8_numeric(const __m512i _value) : base8(_value) {} + + // Store to array + simdjson_really_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_really_inline void compress(uint64_t mask, L * output) const { + _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + } + + template + simdjson_really_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_really_inline simd8() : base8_numeric() {} + simdjson_really_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_really_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_really_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_really_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + + simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_really_inline simd8() : base8_numeric() {} + simdjson_really_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_really_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_really_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_really_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_really_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_really_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_really_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_really_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + + simdjson_really_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_really_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_really_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_really_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_really_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_really_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + + simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_really_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } + + simdjson_really_inline simd8 reduce_or() const { + return this->chunks[0]; + } + + simdjson_really_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } + + simdjson_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } + + simdjson_really_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file include/simdjson/icelake/simd.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace icelake { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace icelake { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/icelake/stringparsing.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_H +#define SIMDJSON_ICELAKE_STRINGPARSING_H + + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_really_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote + +simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +/* begin file include/simdjson/generic/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace icelake { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // check for low surrogate for characters outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + + // if the first code point is invalid we will get here, as we will go past + // the check for being outside the Basic Multilingual plane. If we don't + // find a \u immediately afterwards we fail out anyhow, but if we do, + // this check catches both the case of the first code point being invalid + // or the second code point being invalid. + if ((code_point | code_point_2) >> 16) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; + *src_ptr += 6; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then + * dst needs to have four free bytes. + */ +simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { + if (*(src++) != '"') { return STRING_ERROR; } + auto end = stringparsing::parse_string(src, current_string_buf_loc); + if (!end) { return STRING_ERROR; } + s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); + current_string_buf_loc = end; + return SUCCESS; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file include/simdjson/generic/stringparsing.h */ + +#endif // SIMDJSON_ICELAKE_STRINGPARSING_H +/* end file include/simdjson/icelake/stringparsing.h */ +/* begin file include/simdjson/icelake/numberparsing.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_H + +namespace simdjson { +namespace icelake { +namespace { + +static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace icelake { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + WRITE_DOUBLE(0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + const uint8_t *p = src + negative + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*p != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += negative + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_H +/* end file include/simdjson/icelake/numberparsing.h */ +/* begin file include/simdjson/icelake/end.h */ +SIMDJSON_UNTARGET_ICELAKE +/* end file include/simdjson/icelake/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_ICELAKE +#endif // SIMDJSON_ICELAKE_H +/* end file include/simdjson/icelake.h */ +/* begin file include/simdjson/haswell.h */ +#ifndef SIMDJSON_HASWELL_H +#define SIMDJSON_HASWELL_H + + +#if SIMDJSON_IMPLEMENTATION_HASWELL + +#if SIMDJSON_CAN_ALWAYS_RUN_HASWELL +#define SIMDJSON_TARGET_HASWELL +#define SIMDJSON_UNTARGET_HASWELL +#else +#define SIMDJSON_TARGET_HASWELL SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt") +#define SIMDJSON_UNTARGET_HASWELL SIMDJSON_UNTARGET_REGION +#endif + +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { +} // namespace haswell +} // namespace simdjson + +// +// These two need to be included outside SIMDJSON_TARGET_HASWELL +// +/* begin file include/simdjson/haswell/implementation.h */ +#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H +#define SIMDJSON_HASWELL_IMPLEMENTATION_H + + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +namespace haswell { + +using namespace simdjson; + +class implementation final : public simdjson::implementation { +public: + simdjson_really_inline implementation() : simdjson::implementation( + "haswell", + "Intel/AMD AVX2", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H +/* end file include/simdjson/haswell/implementation.h */ +/* begin file include/simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + + +#ifdef SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#ifdef SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file include/simdjson/haswell/intrinsics.h */ + +// +// The rest need to be inside the region +// +/* begin file include/simdjson/haswell/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "haswell" +// #define SIMDJSON_IMPLEMENTATION haswell +SIMDJSON_TARGET_HASWELL +/* end file include/simdjson/haswell/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace haswell { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace haswell +} // namespace simdjson + +namespace simdjson { +namespace haswell { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace haswell +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +namespace simdjson { +namespace haswell { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_really_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_really_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_really_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file include/simdjson/haswell/bitmanipulation.h */ +/* begin file include/simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + +namespace simdjson { +namespace haswell { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file include/simdjson/haswell/bitmask.h */ +/* begin file include/simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H + + +namespace simdjson { +namespace haswell { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_really_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_really_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_really_inline operator const __m256i&() const { return this->value; } + simdjson_really_inline operator __m256i&() { return this->value; } + + // Bit operations + simdjson_really_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_really_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_really_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_really_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_really_inline base8() : base>() {} + simdjson_really_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base::value); + + template + simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_really_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_really_inline simd8() : base8() {} + simdjson_really_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_really_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_really_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_really_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_really_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_really_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_really_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_really_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_really_inline base8_numeric() : base8() {} + simdjson_really_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_really_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_really_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } + + template + simdjson_really_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_really_inline simd8() : base8_numeric() {} + simdjson_really_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_really_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_really_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_really_inline simd8() : base8_numeric() {} + simdjson_really_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_really_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_really_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_really_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_really_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_really_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_really_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_really_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_really_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_really_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_really_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_really_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } + + simdjson_really_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_really_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdjson_really_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_really_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_really_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file include/simdjson/haswell/simd.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace haswell { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/haswell/stringparsing.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_H +#define SIMDJSON_HASWELL_STRINGPARSING_H + + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_really_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +/* begin file include/simdjson/generic/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // check for low surrogate for characters outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + + // if the first code point is invalid we will get here, as we will go past + // the check for being outside the Basic Multilingual plane. If we don't + // find a \u immediately afterwards we fail out anyhow, but if we do, + // this check catches both the case of the first code point being invalid + // or the second code point being invalid. + if ((code_point | code_point_2) >> 16) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; + *src_ptr += 6; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then + * dst needs to have four free bytes. + */ +simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { + if (*(src++) != '"') { return STRING_ERROR; } + auto end = stringparsing::parse_string(src, current_string_buf_loc); + if (!end) { return STRING_ERROR; } + s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); + current_string_buf_loc = end; + return SUCCESS; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file include/simdjson/generic/stringparsing.h */ + +#endif // SIMDJSON_HASWELL_STRINGPARSING_H +/* end file include/simdjson/haswell/stringparsing.h */ +/* begin file include/simdjson/haswell/numberparsing.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_H +#define SIMDJSON_HASWELL_NUMBERPARSING_H + +namespace simdjson { +namespace haswell { +namespace { + +static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace haswell { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + WRITE_DOUBLE(0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + const uint8_t *p = src + negative + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*p != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += negative + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_H +/* end file include/simdjson/haswell/numberparsing.h */ +/* begin file include/simdjson/haswell/end.h */ +SIMDJSON_UNTARGET_HASWELL +/* end file include/simdjson/haswell/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_HASWELL +#endif // SIMDJSON_HASWELL_COMMON_H +/* end file include/simdjson/haswell.h */ +/* begin file include/simdjson/ppc64.h */ +#ifndef SIMDJSON_PPC64_H +#define SIMDJSON_PPC64_H + + +#if SIMDJSON_IMPLEMENTATION_PPC64 + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { +} // namespace ppc64 +} // namespace simdjson + +/* begin file include/simdjson/ppc64/implementation.h */ +#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H +#define SIMDJSON_PPC64_IMPLEMENTATION_H + + +namespace simdjson { +namespace ppc64 { + +namespace { +using namespace simdjson; +using namespace simdjson::dom; +} // namespace + +class implementation final : public simdjson::implementation { +public: + simdjson_really_inline implementation() + : simdjson::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, size_t max_length, + std::unique_ptr &dst) + const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, + uint8_t *dst, + size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_IMPLEMENTATION_H +/* end file include/simdjson/ppc64/implementation.h */ + +/* begin file include/simdjson/ppc64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "ppc64" +// #define SIMDJSON_IMPLEMENTATION ppc64 +/* end file include/simdjson/ppc64/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace ppc64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { +namespace ppc64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace ppc64 +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file include/simdjson/ppc64/intrinsics.h */ +/* begin file include/simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_really_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_really_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_really_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_really_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file include/simdjson/ppc64/bitmanipulation.h */ +/* begin file include/simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file include/simdjson/ppc64/bitmask.h */ +/* begin file include/simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_really_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_really_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_really_inline operator const __m128i &() const { + return this->value; + } + simdjson_really_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_really_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_really_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_really_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_really_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_really_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_really_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_really_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +// Forward-declared so they can be used by splat and friends. +template struct simd8; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_really_inline base8() : base>() {} + simdjson_really_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_really_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_really_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_really_inline simd8() : base8() {} + simdjson_really_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_really_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_really_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_really_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_really_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_really_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_really_inline simd8 zero() { return splat(0); } + static simdjson_really_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_really_inline base8_numeric() : base8() {} + simdjson_really_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_really_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_really_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_really_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_really_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_really_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_really_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_really_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_really_inline simd8() : base8_numeric() {} + simdjson_really_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_really_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_really_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_really_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_really_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_really_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_really_inline simd8() : base8_numeric() {} + simdjson_really_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_really_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_really_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_really_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_really_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_really_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_really_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_really_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_really_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_really_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_really_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_really_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_really_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_really_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_really_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_really_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_really_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_really_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_really_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_really_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_really_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_really_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_really_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_really_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_really_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_really_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file include/simdjson/ppc64/simd.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/ppc64/stringparsing.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_H +#define SIMDJSON_PPC64_STRINGPARSING_H + + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_really_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_really_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_really_inline bool has_backslash() { return bs_bits != 0; } + simdjson_really_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_really_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_really_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +/* begin file include/simdjson/generic/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // check for low surrogate for characters outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + + // if the first code point is invalid we will get here, as we will go past + // the check for being outside the Basic Multilingual plane. If we don't + // find a \u immediately afterwards we fail out anyhow, but if we do, + // this check catches both the case of the first code point being invalid + // or the second code point being invalid. + if ((code_point | code_point_2) >> 16) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; + *src_ptr += 6; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then + * dst needs to have four free bytes. + */ +simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { + if (*(src++) != '"') { return STRING_ERROR; } + auto end = stringparsing::parse_string(src, current_string_buf_loc); + if (!end) { return STRING_ERROR; } + s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); + current_string_buf_loc = end; + return SUCCESS; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file include/simdjson/generic/stringparsing.h */ + +#endif // SIMDJSON_PPC64_STRINGPARSING_H +/* end file include/simdjson/ppc64/stringparsing.h */ +/* begin file include/simdjson/ppc64/numberparsing.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_H +#define SIMDJSON_PPC64_NUMBERPARSING_H + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +static simdjson_really_inline uint32_t +parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace ppc64 { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + WRITE_DOUBLE(0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + const uint8_t *p = src + negative + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*p != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += negative + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_PPC64_NUMBERPARSING_H +/* end file include/simdjson/ppc64/numberparsing.h */ +/* begin file include/simdjson/ppc64/end.h */ +/* end file include/simdjson/ppc64/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_PPC64 + +#endif // SIMDJSON_PPC64_H +/* end file include/simdjson/ppc64.h */ +/* begin file include/simdjson/westmere.h */ +#ifndef SIMDJSON_WESTMERE_H +#define SIMDJSON_WESTMERE_H + + +#if SIMDJSON_IMPLEMENTATION_WESTMERE + +#if SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +#define SIMDJSON_TARGET_WESTMERE +#define SIMDJSON_UNTARGET_WESTMERE +#else +#define SIMDJSON_TARGET_WESTMERE SIMDJSON_TARGET_REGION("sse4.2,pclmul") +#define SIMDJSON_UNTARGET_WESTMERE SIMDJSON_UNTARGET_REGION +#endif + +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { +} // namespace westmere +} // namespace simdjson + +// +// These two need to be included outside SIMDJSON_TARGET_WESTMERE +// +/* begin file include/simdjson/westmere/implementation.h */ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +namespace { +using namespace simdjson; +using namespace simdjson::dom; +} + +class implementation final : public simdjson::implementation { +public: + simdjson_really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file include/simdjson/westmere/implementation.h */ +/* begin file include/simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +#ifdef SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#ifdef SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + + + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file include/simdjson/westmere/intrinsics.h */ + +// +// The rest need to be inside the region +// +/* begin file include/simdjson/westmere/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "westmere" +// #define SIMDJSON_IMPLEMENTATION westmere +SIMDJSON_TARGET_WESTMERE +/* end file include/simdjson/westmere/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace westmere { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace westmere +} // namespace simdjson + +namespace simdjson { +namespace westmere { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace westmere +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_really_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_really_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_really_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file include/simdjson/westmere/bitmanipulation.h */ +/* begin file include/simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file include/simdjson/westmere/bitmask.h */ +/* begin file include/simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_really_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_really_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_really_inline operator const __m128i&() const { return this->value; } + simdjson_really_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_really_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_really_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_really_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_really_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_really_inline base8() : base>() {} + simdjson_really_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_really_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_really_inline simd8() : base8() {} + simdjson_really_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_really_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_really_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_really_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_really_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_really_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_really_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_really_inline base8_numeric() : base8() {} + simdjson_really_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_really_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_really_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_really_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_really_inline simd8() : base8_numeric() {} + simdjson_really_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_really_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_really_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_really_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_really_inline simd8() : base8_numeric() {} + simdjson_really_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_really_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_really_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_really_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_really_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_really_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_really_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_really_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_really_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_really_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_really_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_really_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_really_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_really_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_really_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_really_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_really_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_really_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_really_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file include/simdjson/westmere/simd.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace westmere { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/westmere/stringparsing.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_H +#define SIMDJSON_WESTMERE_STRINGPARSING_H + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_really_inline bool has_backslash() { return bs_bits != 0; } + simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +/* begin file include/simdjson/generic/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // check for low surrogate for characters outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + + // if the first code point is invalid we will get here, as we will go past + // the check for being outside the Basic Multilingual plane. If we don't + // find a \u immediately afterwards we fail out anyhow, but if we do, + // this check catches both the case of the first code point being invalid + // or the second code point being invalid. + if ((code_point | code_point_2) >> 16) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; + *src_ptr += 6; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then + * dst needs to have four free bytes. + */ +simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { + if (*(src++) != '"') { return STRING_ERROR; } + auto end = stringparsing::parse_string(src, current_string_buf_loc); + if (!end) { return STRING_ERROR; } + s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); + current_string_buf_loc = end; + return SUCCESS; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file include/simdjson/generic/stringparsing.h */ + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_H +/* end file include/simdjson/westmere/stringparsing.h */ +/* begin file include/simdjson/westmere/numberparsing.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_H + +namespace simdjson { +namespace westmere { +namespace { + +static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace westmere { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + WRITE_DOUBLE(0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + negative; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + const uint8_t *p = src + negative + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*p != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += negative; + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += negative; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += negative + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src-negative, &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_H +/* end file include/simdjson/westmere/numberparsing.h */ +/* begin file include/simdjson/westmere/end.h */ +SIMDJSON_UNTARGET_WESTMERE +/* end file include/simdjson/westmere/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_WESTMERE +#endif // SIMDJSON_WESTMERE_COMMON_H +/* end file include/simdjson/westmere.h */ + +// Builtin implementation + +SIMDJSON_POP_DISABLE_WARNINGS + +#endif // SIMDJSON_IMPLEMENTATIONS_H +/* end file include/simdjson/implementations.h */ + +// Determine the best builtin implementation +#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake +#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL +#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell +#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere +#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 +#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 +#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK +#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback +#else +#error "All possible implementations (including fallback) have been disabled! simdjson will not run." +#endif +#endif // SIMDJSON_BUILTIN_IMPLEMENTATION + +// redefining SIMDJSON_IMPLEMENTATION to "SIMDJSON_BUILTIN_IMPLEMENTATION" +// #define SIMDJSON_IMPLEMENTATION SIMDJSON_BUILTIN_IMPLEMENTATION + +// ondemand is only compiled as part of the builtin implementation at present + +// Interface declarations +/* begin file include/simdjson/generic/implementation_simdjson_result_base.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_really_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_really_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_really_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_really_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_really_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_really_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_really_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_really_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_really_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_really_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_really_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson +/* end file include/simdjson/generic/implementation_simdjson_result_base.h */ +/* begin file include/simdjson/generic/ondemand.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +/* begin file include/simdjson/generic/ondemand/json_type.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +class value_iterator; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_really_inline number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_really_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_really_inline uint64_t get_uint64() const noexcept; + simdjson_really_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_really_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_really_inline int64_t get_int64() const noexcept; + simdjson_really_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_really_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_really_inline double get_double() const noexcept; + simdjson_really_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_really_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + template + friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); + /** Store a signed 64-bit value to the number. */ + simdjson_really_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_really_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_really_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_really_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; + simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/json_type.h */ +/* begin file include/simdjson/generic/ondemand/token_position.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/token_position.h */ +/* begin file include/simdjson/generic/ondemand/logger.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class json_iterator; +class value_iterator; + +namespace logger { + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/logger.h */ +/* begin file include/simdjson/generic/ondemand/raw_json_string.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class object; +class parser; +class json_iterator; + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is pretty much all you can do. + * + * They originate typically from field instance which in turn represent key-value pairs from + * object instances. From a field instance, you get the raw_json_string instance by calling key(). + * You can, if you want a more usable string_view instance, call the unescaped_key() method + * on the field instance. + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_really_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_really_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_really_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_really_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_really_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_really_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_really_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_really_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_really_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_really_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_really_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param dst A pointer to a buffer at least large enough to write this string as well as a \0. + * dst will be updated to the next unused location (just after the \0 written out at + * the end of this string). + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_really_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter) const noexcept; + + const uint8_t * buf{}; + friend class object; + friend class field; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_really_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_really_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_really_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; + simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_really_inline simdjson_result raw() const noexcept; + simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; + simdjson_really_inline simdjson_warn_unused simdjson_result unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/raw_json_string.h */ +/* begin file include/simdjson/generic/ondemand/token_iterator.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_really_inline token_iterator() noexcept = default; + simdjson_really_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_really_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_really_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_really_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_really_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_really_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_really_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_really_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; + + /** + * Return the current index. + */ + simdjson_really_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_really_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_really_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_really_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_really_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_really_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_really_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_really_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_really_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_really_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_really_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + friend simdjson_really_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; + friend simdjson_really_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; + simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/token_iterator.h */ +/* begin file include/simdjson/generic/ondemand/json_iterator.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class document; +class document_stream; +class object; +class array; +class value; +class raw_json_string; +class parser; + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_really_inline json_iterator() noexcept = default; + simdjson_really_inline json_iterator(json_iterator &&other) noexcept; + simdjson_really_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_really_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_really_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_really_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_really_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_really_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_really_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_really_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_really_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_really_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_really_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_really_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_really_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_really_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_really_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_really_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_really_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_really_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_really_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_really_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_really_inline void descend_to(depth_t child_depth) noexcept; + simdjson_really_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_really_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_really_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_really_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_really_inline error_code optional_error(error_code error, const char *message) noexcept; + + template simdjson_warn_unused simdjson_really_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept; + + simdjson_really_inline token_position position() const noexcept; + simdjson_really_inline void reenter_child(token_position position, depth_t child_depth) noexcept; +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + simdjson_really_inline token_position start_position(depth_t depth) const noexcept; + simdjson_really_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; +protected: + simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; + /// The last token before the end + simdjson_really_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_really_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_really_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend simdjson_really_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; + friend simdjson_really_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_really_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/json_iterator.h */ +/* begin file include/simdjson/generic/ondemand/value_iterator.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class document; +class object; +class array; +class value; +class raw_json_string; +class parser; + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_really_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_really_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_really_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_really_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_really_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_really_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_really_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_really_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_really_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_really_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_really_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_really_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_really_inline simdjson_result start_root_object() noexcept; + + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_really_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_really_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_really_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_really_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_really_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_really_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_really_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_really_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_really_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_really_inline simdjson_result start_root_array() noexcept; + + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_really_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_really_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_really_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_really_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline bool is_null() noexcept; + simdjson_warn_unused simdjson_really_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_raw_json_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_uint64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_int64() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_int64_in_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_double() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_double_in_string() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_bool() noexcept; + simdjson_warn_unused simdjson_really_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result is_root_integer() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_number_type() noexcept; + simdjson_warn_unused simdjson_really_inline simdjson_result get_root_number() noexcept; + simdjson_really_inline bool is_root_null() noexcept; + + simdjson_really_inline error_code error() const noexcept; + simdjson_really_inline uint8_t *&string_buf_loc() noexcept; + simdjson_really_inline const json_iterator &json_iter() const noexcept; + simdjson_really_inline json_iterator &json_iter() noexcept; + + simdjson_really_inline void assert_is_valid() const noexcept; + simdjson_really_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_really_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_really_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_really_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_really_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_really_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_really_inline bool parse_null(const uint8_t *json) const noexcept; + simdjson_really_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_really_inline const uint8_t *peek_start() const noexcept; + simdjson_really_inline uint32_t peek_start_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_really_inline void advance_scalar(const char *type) noexcept; + simdjson_really_inline void advance_root_scalar(const char *type) noexcept; + simdjson_really_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_really_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_really_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_really_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_really_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_really_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_really_inline simdjson_result advance_to_value() noexcept; + + simdjson_really_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_really_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_really_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_really_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_really_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_really_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_really_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_really_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_really_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_really_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; +}; // value_iterator + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/value_iterator.h */ +/* begin file include/simdjson/generic/ondemand/array_iterator.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class array; +class value; +class document; + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_really_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_really_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_really_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_really_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_really_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_really_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_really_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_really_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/array_iterator.h */ +/* begin file include/simdjson/generic/ondemand/object_iterator.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class field; + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_really_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_really_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_really_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_really_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_really_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_really_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_really_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_really_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_really_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/object_iterator.h */ +/* begin file include/simdjson/generic/ondemand/array.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class value; +class document; + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_really_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_really_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_really_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_really_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_really_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_really_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_really_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_really_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_really_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_really_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_really_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; + + simdjson_really_inline simdjson_result begin() noexcept; + simdjson_really_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_really_inline simdjson_result at(size_t index) noexcept; + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/array.h */ +/* begin file include/simdjson/generic/ondemand/document.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class parser; +class array; +class object; +class value; +class raw_json_string; +class array_iterator; +class document_stream; + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_really_inline document() noexcept = default; + simdjson_really_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_really_inline document(document &&other) noexcept = default; + simdjson_really_inline document &operator=(const document &other) noexcept = delete; + simdjson_really_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_really_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_really_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_really_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_really_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_really_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_really_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_really_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_really_inline simdjson_result get_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_really_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_really_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. + * + * @returns Whether the value is null. + */ + simdjson_really_inline bool is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_really_inline simdjson_result get() & noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_really_inline simdjson_result get() && noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_really_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_really_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_really_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_really_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_really_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_really_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_really_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_really_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_really_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_really_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value. + * + * @returns A value value. + * @exception if a JSON value cannot be found + */ + simdjson_really_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_really_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_really_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_really_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_really_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_really_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_really_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_really_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_really_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_really_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_really_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_really_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_really_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_really_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_really_inline error_code consume() noexcept; + + simdjson_really_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_really_inline value_iterator resume_value_iterator() noexcept; + simdjson_really_inline value_iterator get_root_value_iterator() noexcept; + simdjson_really_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_really_inline document_reference() noexcept; + simdjson_really_inline document_reference(document &d) noexcept; + simdjson_really_inline document_reference(const document_reference &other) noexcept = default; + simdjson_really_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_really_inline void rewind() noexcept; + simdjson_really_inline simdjson_result get_array() & noexcept; + simdjson_really_inline simdjson_result get_object() & noexcept; + simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline simdjson_result get_value() noexcept; + + simdjson_really_inline bool is_null() noexcept; + simdjson_really_inline simdjson_result raw_json() noexcept; + simdjson_really_inline operator document&() const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline operator array() & noexcept(false); + simdjson_really_inline operator object() & noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); + simdjson_really_inline operator double() noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_really_inline operator raw_json_string() noexcept(false); + simdjson_really_inline operator bool() noexcept(false); + simdjson_really_inline operator value() noexcept(false); +#endif + simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_really_inline simdjson_result at(size_t index) & noexcept; + simdjson_really_inline simdjson_result begin() & noexcept; + simdjson_really_inline simdjson_result end() & noexcept; + simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_really_inline simdjson_result type() noexcept; + simdjson_really_inline simdjson_result is_scalar() noexcept; + + simdjson_really_inline simdjson_result current_location() noexcept; + simdjson_really_inline int32_t current_depth() const noexcept; + simdjson_really_inline bool is_negative() noexcept; + simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_really_inline simdjson_result get_number() noexcept; + simdjson_really_inline simdjson_result raw_json_token() noexcept; + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; + simdjson_really_inline error_code rewind() noexcept; + + simdjson_really_inline simdjson_result get_array() & noexcept; + simdjson_really_inline simdjson_result get_object() & noexcept; + simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_double_from_string() noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline simdjson_result get_value() noexcept; + simdjson_really_inline bool is_null() noexcept; + + template simdjson_really_inline simdjson_result get() & noexcept; + template simdjson_really_inline simdjson_result get() && noexcept; + + template simdjson_really_inline error_code get(T &out) & noexcept; + template simdjson_really_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); + simdjson_really_inline operator double() noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_really_inline operator bool() noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); +#endif + simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_really_inline simdjson_result at(size_t index) & noexcept; + simdjson_really_inline simdjson_result begin() & noexcept; + simdjson_really_inline simdjson_result end() & noexcept; + simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_really_inline simdjson_result type() noexcept; + simdjson_really_inline simdjson_result is_scalar() noexcept; + simdjson_really_inline simdjson_result current_location() noexcept; + simdjson_really_inline int32_t current_depth() const noexcept; + simdjson_really_inline bool is_negative() noexcept; + simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_really_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_really_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_really_inline simdjson_result raw_json_token() noexcept; + + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept; + simdjson_really_inline simdjson_result() noexcept = default; + simdjson_really_inline error_code rewind() noexcept; + + simdjson_really_inline simdjson_result get_array() & noexcept; + simdjson_really_inline simdjson_result get_object() & noexcept; + simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline simdjson_result get_value() noexcept; + simdjson_really_inline bool is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); + simdjson_really_inline operator double() noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_really_inline operator bool() noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); +#endif + simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_really_inline simdjson_result at(size_t index) & noexcept; + simdjson_really_inline simdjson_result begin() & noexcept; + simdjson_really_inline simdjson_result end() & noexcept; + simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_really_inline simdjson_result type() noexcept; + simdjson_really_inline simdjson_result is_scalar() noexcept; + simdjson_really_inline simdjson_result current_location() noexcept; + simdjson_really_inline int32_t current_depth() const noexcept; + simdjson_really_inline bool is_negative() noexcept; + simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_really_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_really_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_really_inline simdjson_result raw_json_token() noexcept; + + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/document.h */ +/* begin file include/simdjson/generic/ondemand/value.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class array; +class document; +class field; +class object; +class raw_json_string; + +/** + * An ephemeral JSON value returned during iteration. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_really_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_really_inline simdjson_result get() noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_really_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_really_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_really_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_really_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_really_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_really_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_really_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_really_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_really_inline simdjson_result get_string() noexcept; + + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_really_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. + * + * @returns Whether the value is null. + */ + simdjson_really_inline bool is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_really_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_really_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_really_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_really_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_really_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_really_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_really_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_really_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_really_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_really_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_really_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + */ + simdjson_really_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_really_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_really_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_really_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_really_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_really_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_really_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_really_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_really_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_really_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; + + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_really_inline std::string_view raw_json_token() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_really_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_really_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + +protected: + /** + * Create a value. + */ + simdjson_really_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_really_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_really_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_really_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_really_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_really_inline void log_value(const char *type) const noexcept; + // simdjson_really_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; + + simdjson_really_inline simdjson_result get_array() noexcept; + simdjson_really_inline simdjson_result get_object() noexcept; + + simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_really_inline simdjson_result get_int64_in_string() noexcept; + simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_really_inline simdjson_result get_double_in_string() noexcept; + simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_really_inline bool is_null() noexcept; + + template simdjson_really_inline simdjson_result get() noexcept; + + template simdjson_really_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false); + simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_really_inline operator int64_t() noexcept(false); + simdjson_really_inline operator double() noexcept(false); + simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_really_inline operator bool() noexcept(false); +#endif + simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_really_inline simdjson_result at(size_t index) noexcept; + simdjson_really_inline simdjson_result begin() & noexcept; + simdjson_really_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_really_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_really_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_really_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_really_inline simdjson_result type() noexcept; + simdjson_really_inline simdjson_result is_scalar() noexcept; + simdjson_really_inline simdjson_result is_negative() noexcept; + simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_really_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_really_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_really_inline simdjson_result raw_json_token() noexcept; + + /** @copydoc simdjson_really_inline simdjson_result current_location() noexcept */ + simdjson_really_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_really_inline int32_t current_depth() const noexcept */ + simdjson_really_inline int32_t current_depth() const noexcept; + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/value.h */ +/* begin file include/simdjson/generic/ondemand/field.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_really_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_really_inline simdjson_warn_unused simdjson_result unescaped_key() noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_really_inline raw_json_string key() const noexcept; + /** + * Get the field value. + */ + simdjson_really_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_really_inline ondemand::value value() && noexcept; + +protected: + simdjson_really_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_really_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_really_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; + + simdjson_really_inline simdjson_result unescaped_key() noexcept; + simdjson_really_inline simdjson_result key() noexcept; + simdjson_really_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/field.h */ +/* begin file include/simdjson/generic/ondemand/object.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_really_inline object() noexcept = default; + + simdjson_really_inline simdjson_result begin() noexcept; + simdjson_really_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_really_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_really_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_really_inline error_code consume() noexcept; + static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_really_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_really_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_really_inline object resume(const value_iterator &iter) noexcept; + simdjson_really_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_really_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; + + simdjson_really_inline simdjson_result begin() noexcept; + simdjson_really_inline simdjson_result end() noexcept; + simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/object.h */ +/* begin file include/simdjson/generic/ondemand/parser.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class array; +class object; +class value; +class raw_json_string; +class document_stream; + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_really_inline parser(const parser &other) = delete; + simdjson_really_inline parser &operator=(const parser &other) = delete; + simdjson_really_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_really_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_really_inline size_t max_capacity() const noexcept; + simdjson_really_inline void set_max_capacity(size_t max_capacity) noexcept; + /** The maximum depth of this parser (the most deeply nested objects and arrays it can process). */ + simdjson_really_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/parser.h */ +/* begin file include/simdjson/generic/ondemand/document_stream.h */ +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class parser; +class json_iterator; +class document; + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_really_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_really_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_really_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_really_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_really_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_really_inline simdjson_result operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_really_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_really_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_really_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_really_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_really_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_really_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_really_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_really_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/document_stream.h */ +/* begin file include/simdjson/generic/ondemand/serialization.h */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { + +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand +/* end file include/simdjson/generic/ondemand/serialization.h */ +/* end file include/simdjson/generic/ondemand.h */ + +// Inline definitions +/* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_really_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_really_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_really_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_really_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_really_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_really_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_really_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_really_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_really_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_really_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson +/* end file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ +/* begin file include/simdjson/generic/ondemand-inl.h */ +/* begin file include/simdjson/generic/ondemand/json_type-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_really_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_really_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_really_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_really_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + + +simdjson_really_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_really_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_really_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_really_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_really_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_really_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_really_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_really_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_really_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_really_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_really_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/json_type-inl.h */ +/* begin file include/simdjson/generic/ondemand/logger-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + // Technically a static variable is not thread-safe, but if you are using threads + // and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if(!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf("# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN+2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN+2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN+2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5+2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } +} + +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail); +} +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept { + if (LOG_ENABLED) { + const int indent = depth*2; + const auto buf = iter.token.buf; + printf("| %*s%s%-*s ", + indent, "", + title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), title + ); + { + // Print the current structural. + printf("| "); + auto current_structural = &buf[*index]; + for (int i=0;i(buf); } +simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(uint8_t *&dst) const noexcept { + uint8_t *end = stringparsing::parse_string(buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_really_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_really_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_really_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_really_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_really_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_really_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_really_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter) const noexcept { + return unescape(iter.string_buf_loc()); +} + + +simdjson_unused simdjson_really_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_really_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(uint8_t *&dst) const noexcept { + if (error()) { return error(); } + return first.unescape(dst); +} +simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape(iter); +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/raw_json_string-inl.h */ +/* begin file include/simdjson/generic/ondemand/token_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_really_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_really_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_really_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_really_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_really_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_really_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_really_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_really_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_really_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_really_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_really_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_really_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_really_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_really_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/token_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/json_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_really_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_really_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_really_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_really_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_really_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_really_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_really_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_really_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_really_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_really_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_really_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_really_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_really_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_really_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(0); +} + +simdjson_really_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_really_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_really_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_really_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} + +simdjson_really_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_really_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_really_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_really_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_really_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_really_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_really_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_really_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_really_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + return parser->start_positions[depth]; +} + +simdjson_really_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + parser->start_positions[depth] = position; +} + +#endif + + +simdjson_really_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + +template +simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept { + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Truncate whitespace to fit the buffer. + if (max_len > N-1) { + // if (jsoncharutils::is_not_structural_or_whitespace(json[N-1])) { return false; } + max_len = N-1; + } + + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + tmpbuf[max_len] = ' '; + return true; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/json_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/value_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if (! _json_iter->streaming() && (*_json_iter->peek_last() != '}')) { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + return started_object(); +} + +simdjson_warn_unused simdjson_really_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_really_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() && (*_json_iter->peek_last() != ']')) { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + return started_array(); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_really_inline bool value_iterator::parse_null(const uint8_t *json) const noexcept { + return !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_string() noexcept { + return get_raw_json_string().unescape(_json_iter->string_buf_loc()); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_really_inline bool value_iterator::is_null() noexcept { + auto result = parse_null(peek_non_root_scalar("null")); + if(result) { advance_non_root_scalar("null"); } + return result; +} +simdjson_really_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_really_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_really_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_really_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_really_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_really_inline simdjson_result value_iterator::is_root_integer() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + return numberparsing::is_integer(tmpbuf); +} + +simdjson_really_inline simdjson_result value_iterator::get_root_number_type() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + return numberparsing::get_number_type(tmpbuf); +} +simdjson_really_inline simdjson_result value_iterator::get_root_number() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + advance_root_scalar("number"); + return num; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_string() noexcept { + return get_string(); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_raw_json_string() noexcept { + return get_raw_json_string(); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64_in_string() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64_in_string() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("double"); } + return result; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double_in_string() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_bool() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { advance_root_scalar("bool"); } + return result; +} +simdjson_really_inline bool value_iterator::is_root_null() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(result) { advance_root_scalar("null"); } + return result; +} + +simdjson_warn_unused simdjson_really_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_really_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_really_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_really_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_really_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_really_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_really_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_really_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_really_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_really_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_really_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_really_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_really_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_really_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} + +simdjson_really_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_really_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_really_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_really_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_really_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_really_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_really_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_really_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_really_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_really_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_really_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_really_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_really_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_really_inline simdjson_result value_iterator::reset_array() noexcept { + move_at_container_start(); + return started_array(); +} + +simdjson_really_inline simdjson_result value_iterator::reset_object() noexcept { + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_really_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_really_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_really_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_really_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_really_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_really_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_really_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/value_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/array_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_really_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_really_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_really_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_really_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value +) noexcept + : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_really_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_really_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/array_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/object_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +// +// object_iterator +// + +simdjson_really_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_really_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_really_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_really_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +simdjson_really_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_really_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_really_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/object_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/array-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_really_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_really_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_really_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_really_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_really_inline simdjson_result array::begin() noexcept { +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_really_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_really_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_really_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_really_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} + +simdjson_really_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +simdjson_really_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_really_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_really_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_really_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/array-inl.h */ +/* begin file include/simdjson/generic/ondemand/document-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_really_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_really_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_really_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_really_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_really_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + iter.assert_at_document_depth(); + switch (*iter.peek()) { + case '[': + case '{': + return value(get_root_value_iterator()); + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + // return value(get_root_value_iterator()); + } +} +simdjson_really_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_really_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} +simdjson_really_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(); +} +simdjson_really_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(); +} +simdjson_really_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(); +} +simdjson_really_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(); +} +simdjson_really_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(); +} +simdjson_really_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(); +} +simdjson_really_inline simdjson_result document::get_string() noexcept { + return get_root_value_iterator().get_root_string(); +} +simdjson_really_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(); +} +simdjson_really_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(); +} +simdjson_really_inline bool document::is_null() noexcept { + return get_root_value_iterator().is_root_null(); +} + +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_string(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_string(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_really_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_really_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_really_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_really_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_really_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_really_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_really_inline document::operator std::string_view() noexcept(false) { return get_string(); } +simdjson_really_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_really_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_really_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_really_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { + iter._depth = 1 ; /* undoing the increment so we go back at the doc depth.*/ + iter.assert_at_document_depth(); + } + return answer; +} +simdjson_really_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { + iter._depth = 1 ; /* undoing the increment so we go back at the doc depth.*/ + iter.assert_at_document_depth(); + } + return answer; +} +simdjson_really_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_really_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_really_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_really_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_really_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_really_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_really_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_really_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_really_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_really_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_really_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_really_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_really_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_really_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_really_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(); +} + +simdjson_really_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(); +} + +simdjson_really_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(); +} + + +simdjson_really_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +} + +simdjson_really_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_really_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_really_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_really_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_really_inline bool simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_really_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_really_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_really_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_really_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) & noexcept = delete; +template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_really_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + + +simdjson_really_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_really_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_really_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_really_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_really_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_really_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +simdjson_really_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_uint64(); } +simdjson_really_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_int64(); } +simdjson_really_inline simdjson_result document_reference::get_double() noexcept { return doc->get_double(); } +simdjson_really_inline simdjson_result document_reference::get_string() noexcept { return doc->get_string(); } +simdjson_really_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_raw_json_string(); } +simdjson_really_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_bool(); } +simdjson_really_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_really_inline bool document_reference::is_null() noexcept { return doc->is_null(); } + +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_really_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_really_inline document_reference::operator uint64_t() noexcept(false) { return uint64_t(*doc); } +simdjson_really_inline document_reference::operator int64_t() noexcept(false) { return int64_t(*doc); } +simdjson_really_inline document_reference::operator double() noexcept(false) { return double(*doc); } +simdjson_really_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_really_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_really_inline document_reference::operator bool() noexcept(false) { return bool(*doc); } +simdjson_really_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_really_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_really_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_really_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_really_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_really_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_really_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_really_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_really_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_really_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_really_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_really_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_really_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_really_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_really_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_really_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_really_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_really_inline simdjson_result document_reference::is_integer() noexcept { return doc->is_integer(); } +simdjson_really_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_number_type(); } +simdjson_really_inline simdjson_result document_reference::get_number() noexcept { return doc->get_number(); } +simdjson_really_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_really_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_really_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_really_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + + + +namespace simdjson { +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_really_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_really_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_really_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_really_inline bool simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_really_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_really_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/document-inl.h */ +/* begin file include/simdjson/generic/ondemand/value-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_really_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_really_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_really_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_really_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_really_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_really_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_really_inline simdjson_result value::get_string() noexcept { + return iter.get_string(); +} +simdjson_really_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_really_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_really_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_really_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_really_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_really_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_really_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_really_inline bool value::is_null() noexcept { + return iter.is_null(); +} + +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_string(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_really_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_really_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_really_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_really_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_really_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_really_inline value::operator std::string_view() noexcept(false) { + return get_string(); +} +simdjson_really_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_really_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_really_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_really_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_really_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_really_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_really_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_really_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_really_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_really_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_really_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_really_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_really_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_really_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_really_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_really_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_really_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_really_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_really_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_really_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_really_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +simdjson_really_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_really_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_really_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_really_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_really_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_really_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_really_inline bool simdjson_result::is_null() noexcept { + if (error()) { return false; } + return first.is_null(); +} + +template simdjson_really_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_really_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_really_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_really_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_really_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_really_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_really_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/value-inl.h */ +/* begin file include/simdjson/generic/ondemand/field-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +simdjson_really_inline field::field() noexcept : std::pair() {} + +simdjson_really_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_really_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_really_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_really_inline simdjson_warn_unused simdjson_result field::unescaped_key() noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.string_buf_loc()); + first.consume(); + return answer; +} + +simdjson_really_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + +simdjson_really_inline value &field::value() & noexcept { + return second; +} + +simdjson_really_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_really_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} +simdjson_really_inline simdjson_result simdjson_result::unescaped_key() noexcept { + if (error()) { return error(); } + return first.unescaped_key(); +} +simdjson_really_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/field-inl.h */ +/* begin file include/simdjson/generic/ondemand/object-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); +} +simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); +} +simdjson_really_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_really_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_really_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); +} +simdjson_really_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); +} + +simdjson_really_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_really_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_really_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_really_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek(0)}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_really_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_really_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_really_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_really_inline simdjson_result object::begin() noexcept { +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_really_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +simdjson_really_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_really_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_really_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_really_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/object-inl.h */ +/* begin file include/simdjson/generic/ondemand/parser-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_really_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#ifdef SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + return document_stream(*this, buf, len, batch_size); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size) noexcept { + return iterate_many(s.data(), s.length(), batch_size); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size) noexcept { + return iterate_many(s.data(), s.length(), batch_size); +} + +simdjson_really_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_really_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_really_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + size_t MINIMAL_DOCUMENT_CAPACITY = 32; + if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = MINIMAL_DOCUMENT_CAPACITY; + } +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/parser-inl.h */ +/* begin file include/simdjson/generic/ondemand/document_stream-inl.h */ +#include +#include +#include +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_really_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_really_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_really_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_really_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_really_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_really_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_really_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_really_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_really_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_really_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_really_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: Remove any trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_really_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_really_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} +/* end file include/simdjson/generic/ondemand/document_stream-inl.h */ +/* begin file include/simdjson/generic/ondemand/serialization-inl.h */ + + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand +/* end file include/simdjson/generic/ondemand/serialization-inl.h */ +/* end file include/simdjson/generic/ondemand-inl.h */ + + +namespace simdjson { + /** + * Represents the best statically linked simdjson implementation that can be used by the compiling + * program. + * + * Detects what options the program is compiled against, and picks the minimum implementation that + * will work on any computer that can run the program. For example, if you compile with g++ + * -march=westmere, it will pick the westmere implementation. The haswell implementation will + * still be available, and can be selected at runtime, but the builtin implementation (and any + * code that uses it) will use westmere. + */ + namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION; + /** + * @copydoc simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand + */ + namespace ondemand = SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; + /** + * Function which returns a pointer to an implementation matching the "builtin" implementation. + * The builtin implementation is the best statically linked simdjson implementation that can be used by the compiling + * program. If you compile with g++ -march=haswell, this will return the haswell implementation. + * It is handy to be able to check what builtin was used: builtin_implementation()->name(). + */ + const implementation * builtin_implementation(); +} // namespace simdjson + +#endif // SIMDJSON_BUILTIN_H +/* end file include/simdjson/builtin.h */ + +#endif // SIMDJSON_H +/* end file include/simdjson.h */ diff --git a/tests/conftest.py b/tests/conftest.py index a3d14a0..c598996 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,6 +42,8 @@ def doc(parser): "int64": -1, "uint64": 18446744073709551615, "double": 1.1, + "double_zero": 0.0, + "double_whole": 1.0, "string": "test", "bool": true, "null_value": null diff --git a/tests/test_float.py b/tests/test_float.py new file mode 100644 index 0000000..ca5e427 --- /dev/null +++ b/tests/test_float.py @@ -0,0 +1,9 @@ + +def test_parser_understands_float(parser): + doc = parser.parse('1.0') + assert type(doc) is float + + +def test_mini_does_not_drop_zero(parser): + doc = parser.parse(b'[0.0, 0.5, 1.0]') + assert doc.mini == b'[0.0,0.5,1.0]' From 8477e03d9dc1edce319433d9e00048a43f8340ae Mon Sep 17 00:00:00 2001 From: Edgars Irmejs Date: Wed, 23 Nov 2022 18:59:56 +0200 Subject: [PATCH 3/3] Use the upstreamed fix of simdjson 3.0.1 --- simdjson/patches/README | 5 - simdjson/patches/apply.sh | 7 - simdjson/patches/float-aware-minify.patch | 42 - simdjson/simdjson.cpp | 3331 +- simdjson/simdjson.h | 6446 ++-- simdjson/simdjson_source/README | 7 - simdjson/simdjson_source/simdjson.cpp | 15051 --------- simdjson/simdjson_source/simdjson.h | 32135 -------------------- 8 files changed, 5073 insertions(+), 51951 deletions(-) delete mode 100644 simdjson/patches/README delete mode 100755 simdjson/patches/apply.sh delete mode 100644 simdjson/patches/float-aware-minify.patch delete mode 100644 simdjson/simdjson_source/README delete mode 100644 simdjson/simdjson_source/simdjson.cpp delete mode 100644 simdjson/simdjson_source/simdjson.h diff --git a/simdjson/patches/README b/simdjson/patches/README deleted file mode 100644 index 6b1f495..0000000 --- a/simdjson/patches/README +++ /dev/null @@ -1,5 +0,0 @@ -Patches to be applied to simdjson library files. See simdjson/simdjson/README - -To make a new patch file: - * Get the patch contents: for f in simdjson.h simdjson.cpp; do diff -u HEAD:simdjson/simdjson_source/$f simdjson/$f; done - * Replace in the .patch file, retaining the explanation comment at the top. Make sure patch is newline terminated. diff --git a/simdjson/patches/apply.sh b/simdjson/patches/apply.sh deleted file mode 100755 index 73d3659..0000000 --- a/simdjson/patches/apply.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -set -e - -cd "$(dirname "$0")/../.." -cp simdjson/simdjson_source/simdjson.{h,cpp} simdjson/ -patch -ruN -p0 < simdjson/patches/float-aware-minify.patch diff --git a/simdjson/patches/float-aware-minify.patch b/simdjson/patches/float-aware-minify.patch deleted file mode 100644 index 409e1f8..0000000 --- a/simdjson/patches/float-aware-minify.patch +++ /dev/null @@ -1,42 +0,0 @@ - - -simdjson minify drops the trailing '.0' from floats, which is fine by JSON spec, -but matters in practice. For example, Elasticsearch dynamic field type detection -is affected. In general, Python distinguishes between int and float, so various -type guarantees may fail. The dump/load cycle should not convert types for a few -byte gain. Let users explicitly convert types, if they need to. - -This modifies minify, so it does not drop the '.0'. - -Note: simdjson started dropping '.0' with d0821adf0e7934f27a8eb5c2fe9b8254e4. - - - ---- simdjson/simdjson_source/simdjson.cpp 2022-11-22 09:20:33.402000000 +0200 -+++ simdjson/simdjson.cpp 2022-11-22 09:19:27.854085130 +0200 -@@ -868,9 +868,9 @@ - - std::memset(buf + k, '0', static_cast(n) - static_cast(k)); - // Make it look like a floating-point number (#362, #378) -- // buf[n + 0] = '.'; -- // buf[n + 1] = '0'; -- return buf + (static_cast(n)); -+ buf[n + 0] = '.'; -+ buf[n + 1] = '0'; -+ return buf + (static_cast(n) + 2U); - } - - if (0 < n && n <= max_exp) { -@@ -933,10 +933,8 @@ - { - *first++ = '0'; - // Make it look like a floating-point number (#362, #378) -- if(negative) { -- *first++ = '.'; -- *first++ = '0'; -- } -+ *first++ = '.'; -+ *first++ = '0'; - return first; - } - // Compute v = buffer * 10^decimal_exponent. diff --git a/simdjson/simdjson.cpp b/simdjson/simdjson.cpp index edcec47..611411a 100644 --- a/simdjson/simdjson.cpp +++ b/simdjson/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2022-06-02 13:56:20 -0400. Do not edit! */ +/* auto-generated on 2022-11-23 10:31:42 -0500. Do not edit! */ /* begin file src/simdjson.cpp */ #include "simdjson.h" @@ -870,7 +870,7 @@ inline char *format_buffer(char *buf, int len, int decimal_exponent, // Make it look like a floating-point number (#362, #378) buf[n + 0] = '.'; buf[n + 1] = '0'; - return buf + (static_cast(n) + 2U); + return buf + (static_cast(n)) + 2; } if (0 < n && n <= max_exp) { @@ -1587,7 +1587,8 @@ namespace internal { { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." }, { INCOMPLETE_ARRAY_OR_OBJECT, "JSON document ended early in the middle of an object or array." }, { SCALAR_DOCUMENT_AS_VALUE, "A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, - { OUT_OF_BOUNDS, "Attempted to access location outside of document."} + { OUT_OF_BOUNDS, "Attempted to access location outside of document."}, + { TRAILING_CONTENT, "Unexpected trailing content in the JSON input."} }; // error_messages[] } // namespace internal @@ -2675,7 +2676,7 @@ class detect_best_supported_implementation_on_first_use final : public implement simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override { return set_best()->validate_utf8(buf, len); } - simdjson_really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} + simdjson_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} private: const implementation *set_best() const noexcept; }; @@ -2791,7 +2792,6 @@ simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, s simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { return get_active_implementation()->validate_utf8(buf, len); } - const implementation * builtin_implementation() { static const implementation * builtin_impl = get_available_implementations()[SIMDJSON_STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)]; assert(builtin_impl); @@ -2848,17 +2848,17 @@ namespace { using namespace simd; struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + static simdjson_inline json_character_block classify(const simd::simd8x64& in); - simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_really_inline uint64_t op() const noexcept { return _op; } - simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } uint64_t _whitespace; uint64_t _op; }; -simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { // Functional programming causes trouble with Visual Studio. // Keeping this version in comments since it is much nicer: // auto v = in.map([&](simd8 chunk) { @@ -2912,15 +2912,15 @@ simdjson_really_inline json_character_block json_character_block::classify(const return { whitespace, op }; } -simdjson_really_inline bool is_ascii(const simd8x64& input) { +simdjson_inline bool is_ascii(const simd8x64& input) { simd8 bits = input.reduce_or(); - return bits.max_val() < 0b10000000u; + return bits.max_val() < 0x80u; } -simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1 >= uint8_t(0xc0u); + simd8 is_third_byte = prev2 >= uint8_t(0xe0u); + simd8 is_fourth_byte = prev3 >= uint8_t(0xf0u); // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well. // This will work fine because we only have to report errors for cases with 0-1 lead bytes. // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is @@ -2929,9 +2929,9 @@ simdjson_unused simdjson_really_inline simd8 must_be_continuation(const si return is_second_byte ^ is_third_byte ^ is_fourth_byte; } -simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2 >= uint8_t(0xe0u); + simd8 is_fourth_byte = prev3 >= uint8_t(0xf0u); return is_third_byte ^ is_fourth_byte; } @@ -2947,7 +2947,7 @@ namespace utf8_validation { using namespace simd; - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -3037,7 +3037,7 @@ using namespace simd; ); return (byte_1_high & byte_1_low & byte_2_high); } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, const simd8 prev_input, const simd8 sc) { simd8 prev2 = input.prev<2>(prev_input); simd8 prev3 = input.prev<3>(prev_input); @@ -3050,7 +3050,7 @@ using namespace simd; // Return nonzero if there are incomplete multibyte characters at the end of the block: // e.g. if there is a 4-byte character, but it's 3 bytes from the end. // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { + simdjson_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // ... 1111____ 111_____ 11______ #if SIMDJSON_IMPLEMENTATION_ICELAKE @@ -3062,14 +3062,14 @@ using namespace simd; 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 }; #else static const uint8_t max_array[32] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 }; #endif const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); @@ -3087,7 +3087,7 @@ using namespace simd; // // Check whether the current bytes are valid UTF-8. // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) simd8 prev1 = input.prev<1>(prev_input); @@ -3098,13 +3098,21 @@ using namespace simd; // The only problem that can happen at EOF is that a multibyte character is too short // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. - simdjson_really_inline void check_eof() { + simdjson_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // possibly finish them. this->error |= this->prev_incomplete; } - simdjson_really_inline void check_next_input(const simd8x64& input) { +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { if(simdjson_likely(is_ascii(input))) { this->error |= this->prev_incomplete; } else { @@ -3113,12 +3121,12 @@ using namespace simd; ||(simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4), "We support one, two or four chunks per 64-byte block."); - if(simd8x64::NUM_CHUNKS == 1) { + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if(simd8x64::NUM_CHUNKS == 2) { + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if(simd8x64::NUM_CHUNKS == 4) { + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); this->check_utf8_bytes(input.chunks[1], input.chunks[0]); this->check_utf8_bytes(input.chunks[2], input.chunks[1]); @@ -3129,7 +3137,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_really_inline error_code errors() { + simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -3157,10 +3165,10 @@ namespace { template struct buf_block_reader { public: - simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t *full_block() const; + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; /** * Get the last block, padded with spaces. * @@ -3170,8 +3178,8 @@ struct buf_block_reader { * * @return the number of effective characters in the last block. */ - simdjson_really_inline size_t get_remainder(uint8_t *dst) const; - simdjson_really_inline void advance(); + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); private: const uint8_t *buf; const size_t len; @@ -3210,23 +3218,23 @@ simdjson_unused static char * format_mask(uint64_t mask) { } template -simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} template -simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } +simdjson_inline size_t buf_block_reader::block_index() { return idx; } template -simdjson_really_inline bool buf_block_reader::has_full_block() const { +simdjson_inline bool buf_block_reader::has_full_block() const { return idx < lenminusstep; } template -simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { +simdjson_inline const uint8_t *buf_block_reader::full_block() const { return &buf[idx]; } template -simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); @@ -3234,7 +3242,7 @@ simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t } template -simdjson_really_inline void buf_block_reader::advance() { +simdjson_inline void buf_block_reader::advance() { idx += STEP_SIZE; } @@ -3250,27 +3258,27 @@ namespace stage1 { struct json_string_block { // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } + simdjson_inline uint64_t escaped() const { return _escaped; } // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } + simdjson_inline uint64_t quote() const { return _quote; } // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } // backslash characters uint64_t _backslash; @@ -3285,14 +3293,14 @@ struct json_string_block { // Scans blocks for string characters, storing the state necessary to do so class json_string_scanner { public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); + simdjson_inline json_string_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). uint64_t prev_in_string = 0ULL; @@ -3327,7 +3335,7 @@ class json_string_scanner { // desired | x | x x x x x x x x | // text | \\\ | \\\"\\\" \\\" \\"\\" | // -simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { +simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { // If there was overflow, pretend the first character isn't a backslash backslash &= ~prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped; @@ -3352,7 +3360,7 @@ simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uin // // Backslash sequences outside of quotes will be detected in stage 2. // -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { +simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { const uint64_t backslash = in.eq('\\'); const uint64_t escaped = find_escaped(backslash); const uint64_t quote = in.eq('"') & ~escaped; @@ -3385,7 +3393,7 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s ); } -simdjson_really_inline error_code json_string_scanner::finish() { +simdjson_inline error_code json_string_scanner::finish() { if (prev_in_string) { return UNCLOSED_STRING; } @@ -3423,25 +3431,25 @@ namespace stage1 { struct json_block { public: // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} /** * The start of structurals. * In simdjson prior to v0.3, these were called the pseudo-structural characters. **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } // Helpers /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } // string and escape characters json_string_block _string; @@ -3456,12 +3464,12 @@ struct json_block { * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * They may reside inside a string. **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } /** * The start of non-operator runs, like 123, true and "abc". * It main reside inside a string. **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + simdjson_inline uint64_t potential_scalar_start() const noexcept { // The term "scalar" refers to anything except structural characters and white space // (so letters, numbers, quotes). // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space @@ -3472,7 +3480,7 @@ struct json_block { * Whether the given character is immediately after a non-operator like 123, true. * The characters following a quote are not included. */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + simdjson_inline uint64_t follows_potential_scalar() const noexcept { // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // white space. @@ -3496,10 +3504,10 @@ struct json_block { */ class json_scanner { public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -3516,13 +3524,13 @@ class json_scanner { // // const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); // -simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { const uint64_t result = match << 1 | overflow; overflow = match >> 63; return result; } -simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { json_string_block strings = string_scanner.next(in); // identifies the white-space and the structural characters json_character_block characters = json_character_block::classify(in); @@ -3547,7 +3555,7 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64 - simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; -simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); dst += in.compress(mask, dst); } -simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -3597,7 +3605,7 @@ simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size } template<> -simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { simd::simd8x64 in_1(block_buf); simd::simd8x64 in_2(block_buf+64); json_block block_1 = scanner.next(in_1); @@ -3608,7 +3616,7 @@ simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, b } template<> -simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { simd::simd8x64 in_1(block_buf); json_block block_1 = scanner.next(in_1); this->next(block_buf, block_1); @@ -3681,7 +3689,7 @@ namespace { * complete document, therefore the last json buffer location is the end of the * batch. */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { // Variant: do not count separately, just figure out depth if(parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -3756,7 +3764,7 @@ class bit_indexer { public: uint32_t *tail; - simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at @@ -3767,9 +3775,9 @@ class bit_indexer { // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own // version of the code. #ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); + simdjson_inline void write(uint32_t idx, uint64_t bits); #else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + simdjson_inline void write(uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. @@ -3878,11 +3886,11 @@ class json_structural_indexer { static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; private: - simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); template - simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -3891,26 +3899,26 @@ class json_structural_indexer { uint64_t unescaped_chars_error = 0; }; -simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} // Skip the last character if it is partial -simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { if (simdjson_unlikely(len < 3)) { switch (len) { case 2: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left return len; case 1: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left return len; case 0: return len; } } - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left return len; } @@ -3959,7 +3967,7 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_pa } template<> -simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { simd::simd8x64 in_1(block); simd::simd8x64 in_2(block+64); json_block block_1 = scanner.next(in_1); @@ -3970,14 +3978,14 @@ simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *bl } template<> -simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { simd::simd8x64 in_1(block); json_block block_1 = scanner.next(in_1); this->next(in_1, block_1, reader.block_index()); reader.advance(); } -simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { uint64_t unescaped = in.lteq(0x1F); checker.check_next_input(in); indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser @@ -3985,7 +3993,7 @@ simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64 backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ /* begin file src/generic/stage2/tape_builder.h */ /* begin file src/generic/stage2/json_iterator.h */ /* begin file src/generic/stage2/logger.h */ @@ -4157,7 +4310,7 @@ namespace logger { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } else { @@ -4166,7 +4319,7 @@ namespace logger { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -4175,7 +4328,7 @@ namespace logger { } } - simdjson_unused static simdjson_really_inline void log_string(const char *message) { + simdjson_unused static simdjson_inline void log_string(const char *message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -4183,7 +4336,7 @@ namespace logger { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; @@ -4261,14 +4414,14 @@ class json_iterator { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -4277,7 +4430,7 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *peek() const noexcept; + simdjson_inline const uint8_t *peek() const noexcept; /** * Advance to the next token. * @@ -4285,56 +4438,56 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *advance() noexcept; + simdjson_inline const uint8_t *advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char *type) const noexcept; + simdjson_inline void log_value(const char *type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; + simdjson_inline void log_start_value(const char *type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; + simdjson_inline void log_end_value(const char *type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char *error) const noexcept; + simdjson_inline void log_error(const char *error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; }; template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { logger::log_start(); // @@ -4459,52 +4612,52 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_docum } // walk_document() -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) : buf{_dom_parser.buf}, next_structural{&_dom_parser.structural_indexes[start_structural_index]}, dom_parser{_dom_parser} { } -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { +simdjson_inline const uint8_t *json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { +simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural-1); } -simdjson_really_inline bool json_iterator::at_eof() const noexcept { +simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { +simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { logger::log_line(*this, "", type, ""); } -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -4520,7 +4673,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root } } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -4552,18 +4705,18 @@ struct tape_writer { uint64_t *next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -4571,24 +4724,24 @@ struct tape_writer { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -4596,50 +4749,50 @@ struct tape_writer { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { +simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } -simdjson_really_inline void tape_writer::skip() noexcept { +simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { +simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::skip_double() noexcept { +simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -4656,40 +4809,40 @@ namespace stage2 { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -4698,22 +4851,22 @@ struct tape_builder { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -4721,18 +4874,18 @@ struct tape_builder { /** Next write location in the string buf for stage 2 parsing */ uint8_t *current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document &doc) noexcept; + simdjson_inline tape_builder(dom::document &doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; }; // class tape_builder template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept { dom_parser.doc = &doc; @@ -4741,56 +4894,56 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_docum return iter.walk_document(builder); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_root_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { return visit_string(iter, key, true); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t *dst = on_start_string(iter); dst = stringparsing::parse_string(value+1, dst); @@ -4802,16 +4955,16 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_strin return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { return visit_string(iter, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -4833,42 +4986,42 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ return error; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -4877,24 +5030,24 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ // private: -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index+2, start); tape.append(start_index, end); return SUCCESS; } -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -4907,13 +5060,13 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_contain return SUCCESS; } -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -4939,7 +5092,7 @@ namespace arm64 { namespace { namespace stage1 { -simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { +simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { // On ARM, we don't short-circuit this if there are no backslashes, because the branch gives us no // benefit and therefore makes things worse. // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } @@ -4971,6 +5124,10 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu return stage2::tape_builder::parse_document(*this, _doc); } +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return arm64::stringparsing::parse_string(src, dst); +} + simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { auto error = stage1(_buf, _len, stage1_mode::regular); if (error) { return error; } @@ -5053,7 +5210,7 @@ namespace { * complete document, therefore the last json buffer location is the end of the * batch. */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { // Variant: do not count separately, just figure out depth if(parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -5127,7 +5284,7 @@ namespace stage1 { class structural_scanner { public: -simdjson_really_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial) +simdjson_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial) : buf{_parser.buf}, next_structural_index{_parser.structural_indexes.get()}, parser{_parser}, @@ -5135,18 +5292,18 @@ simdjson_really_inline structural_scanner(dom_parser_implementation &_parser, st partial{_partial} { } -simdjson_really_inline void add_structural() { +simdjson_inline void add_structural() { *next_structural_index = idx; next_structural_index++; } -simdjson_really_inline bool is_continuation(uint8_t c) { - return (c & 0b11000000) == 0b10000000; +simdjson_inline bool is_continuation(uint8_t c) { + return (c & 0xc0) == 0x80; } -simdjson_really_inline void validate_utf8_character() { +simdjson_inline void validate_utf8_character() { // Continuation - if (simdjson_unlikely((buf[idx] & 0b01000000) == 0)) { + if (simdjson_unlikely((buf[idx] & 0x40) == 0)) { // extra continuation error = UTF8_ERROR; idx++; @@ -5154,7 +5311,7 @@ simdjson_really_inline void validate_utf8_character() { } // 2-byte - if ((buf[idx] & 0b00100000) == 0) { + if ((buf[idx] & 0x20) == 0) { // missing continuation if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) { if (idx+1 > len && is_streaming(partial)) { idx = len; return; } @@ -5163,13 +5320,13 @@ simdjson_really_inline void validate_utf8_character() { return; } // overlong: 1100000_ 10______ - if (buf[idx] <= 0b11000001) { error = UTF8_ERROR; } + if (buf[idx] <= 0xc1) { error = UTF8_ERROR; } idx += 2; return; } // 3-byte - if ((buf[idx] & 0b00010000) == 0) { + if ((buf[idx] & 0x10) == 0) { // missing continuation if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) { if (idx+2 > len && is_streaming(partial)) { idx = len; return; } @@ -5178,9 +5335,9 @@ simdjson_really_inline void validate_utf8_character() { return; } // overlong: 11100000 100_____ ________ - if (buf[idx] == 0b11100000 && buf[idx+1] <= 0b10011111) { error = UTF8_ERROR; } + if (buf[idx] == 0xe0 && buf[idx+1] <= 0x9f) { error = UTF8_ERROR; } // surrogates: U+D800-U+DFFF 11101101 101_____ - if (buf[idx] == 0b11101101 && buf[idx+1] >= 0b10100000) { error = UTF8_ERROR; } + if (buf[idx] == 0xed && buf[idx+1] >= 0xa0) { error = UTF8_ERROR; } idx += 3; return; } @@ -5194,24 +5351,24 @@ simdjson_really_inline void validate_utf8_character() { return; } // overlong: 11110000 1000____ ________ ________ - if (buf[idx] == 0b11110000 && buf[idx+1] <= 0b10001111) { error = UTF8_ERROR; } + if (buf[idx] == 0xf0 && buf[idx+1] <= 0x8f) { error = UTF8_ERROR; } // too large: > U+10FFFF: // 11110100 (1001|101_)____ // 1111(1___|011_|0101) 10______ // also includes 5, 6, 7 and 8 byte characters: // 11111___ - if (buf[idx] == 0b11110100 && buf[idx+1] >= 0b10010000) { error = UTF8_ERROR; } - if (buf[idx] >= 0b11110101) { error = UTF8_ERROR; } + if (buf[idx] == 0xf4 && buf[idx+1] >= 0x90) { error = UTF8_ERROR; } + if (buf[idx] >= 0xf5) { error = UTF8_ERROR; } idx += 4; } // Returns true if the string is unclosed. -simdjson_really_inline bool validate_string() { +simdjson_inline bool validate_string() { idx++; // skip first quote while (idx < len && buf[idx] != '"') { if (buf[idx] == '\\') { idx += 2; - } else if (simdjson_unlikely(buf[idx] & 0b10000000)) { + } else if (simdjson_unlikely(buf[idx] & 0x80)) { validate_utf8_character(); } else { if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; } @@ -5222,7 +5379,7 @@ simdjson_really_inline bool validate_string() { return false; } -simdjson_really_inline bool is_whitespace_or_operator(uint8_t c) { +simdjson_inline bool is_whitespace_or_operator(uint8_t c) { switch (c) { case '{': case '}': case '[': case ']': case ',': case ':': case ' ': case '\r': case '\n': case '\t': @@ -5235,7 +5392,7 @@ simdjson_really_inline bool is_whitespace_or_operator(uint8_t c) { // // Parse the entire input in STEP_SIZE-byte chunks. // -simdjson_really_inline error_code scan() { +simdjson_inline error_code scan() { bool unclosed_string = false; for (;idx len) { return false; } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } // range check - code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); + code_point = (byte & 0x1f) << 6 | (data[pos + 1] & 0x3f); if (code_point < 0x80 || 0x7ff < code_point) { return false; } - } else if ((byte & 0b11110000) == 0b11100000) { + } else if ((byte & 0xf0) == 0xe0) { next_pos = pos + 3; if (next_pos > len) { return false; } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } // range check - code_point = (byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); + code_point = (byte & 0x0f) << 12 | + (data[pos + 1] & 0x3f) << 6 | + (data[pos + 2] & 0x3f); if (code_point < 0x800 || 0xffff < code_point || (0xd7ff < code_point && code_point < 0xe000)) { return false; } - } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 + } else if ((byte & 0xf8) == 0xf0) { // 0b11110000 next_pos = pos + 4; if (next_pos > len) { return false; } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } + if ((data[pos + 3] & 0xc0) != 0x80) { return false; } // range check code_point = - (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); + (byte & 0x07) << 18 | (data[pos + 1] & 0x3f) << 12 | + (data[pos + 2] & 0x3f) << 6 | (data[pos + 3] & 0x3f); if (code_point <= 0xffff || 0x10ffff < code_point) { return false; } } else { // we may have a continuation @@ -5460,6 +5617,151 @@ simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t // // Stage 2 // +/* begin file src/generic/stage2/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ /* begin file src/generic/stage2/tape_builder.h */ /* begin file src/generic/stage2/json_iterator.h */ /* begin file src/generic/stage2/logger.h */ @@ -5485,7 +5787,7 @@ namespace logger { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } else { @@ -5494,7 +5796,7 @@ namespace logger { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -5503,7 +5805,7 @@ namespace logger { } } - simdjson_unused static simdjson_really_inline void log_string(const char *message) { + simdjson_unused static simdjson_inline void log_string(const char *message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -5511,7 +5813,7 @@ namespace logger { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; @@ -5589,14 +5891,14 @@ class json_iterator { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -5605,7 +5907,7 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *peek() const noexcept; + simdjson_inline const uint8_t *peek() const noexcept; /** * Advance to the next token. * @@ -5613,56 +5915,56 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *advance() noexcept; + simdjson_inline const uint8_t *advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char *type) const noexcept; + simdjson_inline void log_value(const char *type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; + simdjson_inline void log_start_value(const char *type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; + simdjson_inline void log_end_value(const char *type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char *error) const noexcept; + simdjson_inline void log_error(const char *error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; }; template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { logger::log_start(); // @@ -5787,52 +6089,52 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_docum } // walk_document() -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) : buf{_dom_parser.buf}, next_structural{&_dom_parser.structural_indexes[start_structural_index]}, dom_parser{_dom_parser} { } -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { +simdjson_inline const uint8_t *json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { +simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural-1); } -simdjson_really_inline bool json_iterator::at_eof() const noexcept { +simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { +simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { logger::log_line(*this, "", type, ""); } -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -5848,7 +6150,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root } } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -5880,18 +6182,18 @@ struct tape_writer { uint64_t *next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -5899,24 +6201,24 @@ struct tape_writer { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -5924,50 +6226,50 @@ struct tape_writer { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { +simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } -simdjson_really_inline void tape_writer::skip() noexcept { +simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { +simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::skip_double() noexcept { +simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -5984,40 +6286,40 @@ namespace stage2 { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -6026,22 +6328,22 @@ struct tape_builder { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -6049,18 +6351,18 @@ struct tape_builder { /** Next write location in the string buf for stage 2 parsing */ uint8_t *current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document &doc) noexcept; + simdjson_inline tape_builder(dom::document &doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; }; // class tape_builder template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept { dom_parser.doc = &doc; @@ -6069,56 +6371,56 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_docum return iter.walk_document(builder); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_root_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { return visit_string(iter, key, true); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t *dst = on_start_string(iter); dst = stringparsing::parse_string(value+1, dst); @@ -6130,16 +6432,16 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_strin return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { return visit_string(iter, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -6161,42 +6463,42 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ return error; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -6205,24 +6507,24 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ // private: -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index+2, start); tape.append(start_index, end); return SUCCESS; } -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -6235,13 +6537,13 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_contain return SUCCESS; } -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -6270,6 +6572,10 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu return stage2::tape_builder::parse_document(*this, _doc); } +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return fallback::stringparsing::parse_string(src, dst); +} + simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { auto error = stage1(_buf, _len, stage1_mode::regular); if (error) { return error; } @@ -6334,25 +6640,25 @@ namespace { using namespace simd; struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + static simdjson_inline json_character_block classify(const simd::simd8x64& in); // ASCII white-space ('\r','\n','\t',' ') - simdjson_really_inline uint64_t whitespace() const noexcept; + simdjson_inline uint64_t whitespace() const noexcept; // non-quote structural characters (comma, colon, braces, brackets) - simdjson_really_inline uint64_t op() const noexcept; + simdjson_inline uint64_t op() const noexcept; // neither a structural character nor a white-space, so letters, numbers and quotes - simdjson_really_inline uint64_t scalar() const noexcept; + simdjson_inline uint64_t scalar() const noexcept; uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) }; -simdjson_really_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } -simdjson_really_inline uint64_t json_character_block::op() const noexcept { return _op; } -simdjson_really_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } +simdjson_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } +simdjson_inline uint64_t json_character_block::op() const noexcept { return _op; } +simdjson_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } // This identifies structural characters (comma, colon, braces, brackets), // and ASCII white-space ('\r','\n','\t',' '). -simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why // we can't use the generic lookup_16. const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); @@ -6402,21 +6708,21 @@ simdjson_really_inline json_character_block json_character_block::classify(const return { whitespace, op }; } -simdjson_really_inline bool is_ascii(const simd8x64& input) { +simdjson_inline bool is_ascii(const simd8x64& input) { return input.reduce_or().is_ascii(); } -simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); } -simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_third_byte | is_fourth_byte) > int8_t(0); } @@ -6433,7 +6739,7 @@ namespace utf8_validation { using namespace simd; - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -6523,7 +6829,7 @@ using namespace simd; ); return (byte_1_high & byte_1_low & byte_2_high); } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, const simd8 prev_input, const simd8 sc) { simd8 prev2 = input.prev<2>(prev_input); simd8 prev3 = input.prev<3>(prev_input); @@ -6536,7 +6842,7 @@ using namespace simd; // Return nonzero if there are incomplete multibyte characters at the end of the block: // e.g. if there is a 4-byte character, but it's 3 bytes from the end. // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { + simdjson_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // ... 1111____ 111_____ 11______ #if SIMDJSON_IMPLEMENTATION_ICELAKE @@ -6548,14 +6854,14 @@ using namespace simd; 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 }; #else static const uint8_t max_array[32] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 }; #endif const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); @@ -6573,7 +6879,7 @@ using namespace simd; // // Check whether the current bytes are valid UTF-8. // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) simd8 prev1 = input.prev<1>(prev_input); @@ -6584,13 +6890,21 @@ using namespace simd; // The only problem that can happen at EOF is that a multibyte character is too short // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. - simdjson_really_inline void check_eof() { + simdjson_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // possibly finish them. this->error |= this->prev_incomplete; } - simdjson_really_inline void check_next_input(const simd8x64& input) { +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { if(simdjson_likely(is_ascii(input))) { this->error |= this->prev_incomplete; } else { @@ -6599,12 +6913,12 @@ using namespace simd; ||(simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4), "We support one, two or four chunks per 64-byte block."); - if(simd8x64::NUM_CHUNKS == 1) { + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if(simd8x64::NUM_CHUNKS == 2) { + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if(simd8x64::NUM_CHUNKS == 4) { + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); this->check_utf8_bytes(input.chunks[1], input.chunks[0]); this->check_utf8_bytes(input.chunks[2], input.chunks[1]); @@ -6615,7 +6929,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_really_inline error_code errors() { + simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -6645,10 +6959,10 @@ namespace { template struct buf_block_reader { public: - simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t *full_block() const; + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; /** * Get the last block, padded with spaces. * @@ -6658,8 +6972,8 @@ struct buf_block_reader { * * @return the number of effective characters in the last block. */ - simdjson_really_inline size_t get_remainder(uint8_t *dst) const; - simdjson_really_inline void advance(); + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); private: const uint8_t *buf; const size_t len; @@ -6698,23 +7012,23 @@ simdjson_unused static char * format_mask(uint64_t mask) { } template -simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} template -simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } +simdjson_inline size_t buf_block_reader::block_index() { return idx; } template -simdjson_really_inline bool buf_block_reader::has_full_block() const { +simdjson_inline bool buf_block_reader::has_full_block() const { return idx < lenminusstep; } template -simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { +simdjson_inline const uint8_t *buf_block_reader::full_block() const { return &buf[idx]; } template -simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); @@ -6722,7 +7036,7 @@ simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t } template -simdjson_really_inline void buf_block_reader::advance() { +simdjson_inline void buf_block_reader::advance() { idx += STEP_SIZE; } @@ -6738,27 +7052,27 @@ namespace stage1 { struct json_string_block { // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } + simdjson_inline uint64_t escaped() const { return _escaped; } // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } + simdjson_inline uint64_t quote() const { return _quote; } // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } // backslash characters uint64_t _backslash; @@ -6773,14 +7087,14 @@ struct json_string_block { // Scans blocks for string characters, storing the state necessary to do so class json_string_scanner { public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); + simdjson_inline json_string_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). uint64_t prev_in_string = 0ULL; @@ -6815,7 +7129,7 @@ class json_string_scanner { // desired | x | x x x x x x x x | // text | \\\ | \\\"\\\" \\\" \\"\\" | // -simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { +simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { // If there was overflow, pretend the first character isn't a backslash backslash &= ~prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped; @@ -6840,7 +7154,7 @@ simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uin // // Backslash sequences outside of quotes will be detected in stage 2. // -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { +simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { const uint64_t backslash = in.eq('\\'); const uint64_t escaped = find_escaped(backslash); const uint64_t quote = in.eq('"') & ~escaped; @@ -6873,7 +7187,7 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s ); } -simdjson_really_inline error_code json_string_scanner::finish() { +simdjson_inline error_code json_string_scanner::finish() { if (prev_in_string) { return UNCLOSED_STRING; } @@ -6911,25 +7225,25 @@ namespace stage1 { struct json_block { public: // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} /** * The start of structurals. * In simdjson prior to v0.3, these were called the pseudo-structural characters. **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } // Helpers /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } // string and escape characters json_string_block _string; @@ -6944,12 +7258,12 @@ struct json_block { * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * They may reside inside a string. **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } /** * The start of non-operator runs, like 123, true and "abc". * It main reside inside a string. **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + simdjson_inline uint64_t potential_scalar_start() const noexcept { // The term "scalar" refers to anything except structural characters and white space // (so letters, numbers, quotes). // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space @@ -6960,7 +7274,7 @@ struct json_block { * Whether the given character is immediately after a non-operator like 123, true. * The characters following a quote are not included. */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + simdjson_inline uint64_t follows_potential_scalar() const noexcept { // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // white space. @@ -6984,10 +7298,10 @@ struct json_block { */ class json_scanner { public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -7004,13 +7318,13 @@ class json_scanner { // // const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); // -simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { const uint64_t result = match << 1 | overflow; overflow = match >> 63; return result; } -simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { json_string_block strings = string_scanner.next(in); // identifies the white-space and the structural characters json_character_block characters = json_character_block::classify(in); @@ -7035,7 +7349,7 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64 - simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; -simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); dst += in.compress(mask, dst); } -simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -7085,7 +7399,7 @@ simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size } template<> -simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { simd::simd8x64 in_1(block_buf); simd::simd8x64 in_2(block_buf+64); json_block block_1 = scanner.next(in_1); @@ -7096,7 +7410,7 @@ simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, b } template<> -simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { simd::simd8x64 in_1(block_buf); json_block block_1 = scanner.next(in_1); this->next(block_buf, block_1); @@ -7169,7 +7483,7 @@ namespace { * complete document, therefore the last json buffer location is the end of the * batch. */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { // Variant: do not count separately, just figure out depth if(parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -7244,7 +7558,7 @@ class bit_indexer { public: uint32_t *tail; - simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at @@ -7255,9 +7569,9 @@ class bit_indexer { // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own // version of the code. #ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); + simdjson_inline void write(uint32_t idx, uint64_t bits); #else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + simdjson_inline void write(uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. @@ -7366,11 +7680,11 @@ class json_structural_indexer { static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; private: - simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); template - simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -7379,26 +7693,26 @@ class json_structural_indexer { uint64_t unescaped_chars_error = 0; }; -simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} // Skip the last character if it is partial -simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { if (simdjson_unlikely(len < 3)) { switch (len) { case 2: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left return len; case 1: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left return len; case 0: return len; } } - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left return len; } @@ -7447,7 +7761,7 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_pa } template<> -simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { simd::simd8x64 in_1(block); simd::simd8x64 in_2(block+64); json_block block_1 = scanner.next(in_1); @@ -7458,14 +7772,14 @@ simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *bl } template<> -simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { simd::simd8x64 in_1(block); json_block block_1 = scanner.next(in_1); this->next(in_1, block_1, reader.block_index()); reader.advance(); } -simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { uint64_t unescaped = in.lteq(0x1F); checker.check_next_input(in); indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser @@ -7473,7 +7787,7 @@ simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64 backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ /* begin file src/generic/stage2/tape_builder.h */ /* begin file src/generic/stage2/json_iterator.h */ /* begin file src/generic/stage2/logger.h */ @@ -7691,7 +8150,7 @@ namespace logger { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } else { @@ -7700,7 +8159,7 @@ namespace logger { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -7709,7 +8168,7 @@ namespace logger { } } - simdjson_unused static simdjson_really_inline void log_string(const char *message) { + simdjson_unused static simdjson_inline void log_string(const char *message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -7717,7 +8176,7 @@ namespace logger { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; @@ -7795,14 +8254,14 @@ class json_iterator { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -7811,7 +8270,7 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *peek() const noexcept; + simdjson_inline const uint8_t *peek() const noexcept; /** * Advance to the next token. * @@ -7819,56 +8278,56 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *advance() noexcept; + simdjson_inline const uint8_t *advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char *type) const noexcept; + simdjson_inline void log_value(const char *type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; + simdjson_inline void log_start_value(const char *type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; + simdjson_inline void log_end_value(const char *type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char *error) const noexcept; + simdjson_inline void log_error(const char *error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; }; template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { logger::log_start(); // @@ -7993,52 +8452,52 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_docum } // walk_document() -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) : buf{_dom_parser.buf}, next_structural{&_dom_parser.structural_indexes[start_structural_index]}, dom_parser{_dom_parser} { } -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { +simdjson_inline const uint8_t *json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { +simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural-1); } -simdjson_really_inline bool json_iterator::at_eof() const noexcept { +simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { +simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { logger::log_line(*this, "", type, ""); } -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -8054,7 +8513,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root } } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -8086,18 +8545,18 @@ struct tape_writer { uint64_t *next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -8105,24 +8564,24 @@ struct tape_writer { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -8130,50 +8589,50 @@ struct tape_writer { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { +simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } -simdjson_really_inline void tape_writer::skip() noexcept { +simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { +simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::skip_double() noexcept { +simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -8190,40 +8649,40 @@ namespace stage2 { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -8232,22 +8691,22 @@ struct tape_builder { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -8255,18 +8714,18 @@ struct tape_builder { /** Next write location in the string buf for stage 2 parsing */ uint8_t *current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document &doc) noexcept; + simdjson_inline tape_builder(dom::document &doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; }; // class tape_builder template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept { dom_parser.doc = &doc; @@ -8275,56 +8734,56 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_docum return iter.walk_document(builder); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_root_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { return visit_string(iter, key, true); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t *dst = on_start_string(iter); dst = stringparsing::parse_string(value+1, dst); @@ -8336,16 +8795,16 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_strin return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { return visit_string(iter, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -8367,42 +8826,42 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ return error; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -8411,24 +8870,24 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ // private: -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index+2, start); tape.append(start_index, end); return SUCCESS; } -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -8441,13 +8900,13 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_contain return SUCCESS; } -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -8473,7 +8932,7 @@ namespace icelake { namespace { namespace stage1 { -simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { +simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } return find_escaped_branchless(backslash); } @@ -8503,6 +8962,10 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu return stage2::tape_builder::parse_document(*this, _doc); } +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return icelake::stringparsing::parse_string(src, dst); +} + simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { auto error = stage1(_buf, _len, stage1_mode::regular); if (error) { return error; } @@ -8568,25 +9031,25 @@ namespace { using namespace simd; struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + static simdjson_inline json_character_block classify(const simd::simd8x64& in); // ASCII white-space ('\r','\n','\t',' ') - simdjson_really_inline uint64_t whitespace() const noexcept; + simdjson_inline uint64_t whitespace() const noexcept; // non-quote structural characters (comma, colon, braces, brackets) - simdjson_really_inline uint64_t op() const noexcept; + simdjson_inline uint64_t op() const noexcept; // neither a structural character nor a white-space, so letters, numbers and quotes - simdjson_really_inline uint64_t scalar() const noexcept; + simdjson_inline uint64_t scalar() const noexcept; uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) }; -simdjson_really_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } -simdjson_really_inline uint64_t json_character_block::op() const noexcept { return _op; } -simdjson_really_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } +simdjson_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } +simdjson_inline uint64_t json_character_block::op() const noexcept { return _op; } +simdjson_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } // This identifies structural characters (comma, colon, braces, brackets), // and ASCII white-space ('\r','\n','\t',' '). -simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why // we can't use the generic lookup_16. const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); @@ -8639,21 +9102,21 @@ simdjson_really_inline json_character_block json_character_block::classify(const return { whitespace, op }; } -simdjson_really_inline bool is_ascii(const simd8x64& input) { +simdjson_inline bool is_ascii(const simd8x64& input) { return input.reduce_or().is_ascii(); } -simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); } -simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_third_byte | is_fourth_byte) > int8_t(0); } @@ -8670,7 +9133,7 @@ namespace utf8_validation { using namespace simd; - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -8760,7 +9223,7 @@ using namespace simd; ); return (byte_1_high & byte_1_low & byte_2_high); } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, const simd8 prev_input, const simd8 sc) { simd8 prev2 = input.prev<2>(prev_input); simd8 prev3 = input.prev<3>(prev_input); @@ -8773,7 +9236,7 @@ using namespace simd; // Return nonzero if there are incomplete multibyte characters at the end of the block: // e.g. if there is a 4-byte character, but it's 3 bytes from the end. // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { + simdjson_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // ... 1111____ 111_____ 11______ #if SIMDJSON_IMPLEMENTATION_ICELAKE @@ -8785,14 +9248,14 @@ using namespace simd; 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 }; #else static const uint8_t max_array[32] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 }; #endif const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); @@ -8810,7 +9273,7 @@ using namespace simd; // // Check whether the current bytes are valid UTF-8. // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) simd8 prev1 = input.prev<1>(prev_input); @@ -8821,13 +9284,21 @@ using namespace simd; // The only problem that can happen at EOF is that a multibyte character is too short // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. - simdjson_really_inline void check_eof() { + simdjson_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // possibly finish them. this->error |= this->prev_incomplete; } - simdjson_really_inline void check_next_input(const simd8x64& input) { +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { if(simdjson_likely(is_ascii(input))) { this->error |= this->prev_incomplete; } else { @@ -8836,12 +9307,12 @@ using namespace simd; ||(simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4), "We support one, two or four chunks per 64-byte block."); - if(simd8x64::NUM_CHUNKS == 1) { + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if(simd8x64::NUM_CHUNKS == 2) { + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if(simd8x64::NUM_CHUNKS == 4) { + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); this->check_utf8_bytes(input.chunks[1], input.chunks[0]); this->check_utf8_bytes(input.chunks[2], input.chunks[1]); @@ -8852,7 +9323,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_really_inline error_code errors() { + simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -8880,10 +9351,10 @@ namespace { template struct buf_block_reader { public: - simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t *full_block() const; + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; /** * Get the last block, padded with spaces. * @@ -8893,8 +9364,8 @@ struct buf_block_reader { * * @return the number of effective characters in the last block. */ - simdjson_really_inline size_t get_remainder(uint8_t *dst) const; - simdjson_really_inline void advance(); + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); private: const uint8_t *buf; const size_t len; @@ -8933,23 +9404,23 @@ simdjson_unused static char * format_mask(uint64_t mask) { } template -simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} template -simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } +simdjson_inline size_t buf_block_reader::block_index() { return idx; } template -simdjson_really_inline bool buf_block_reader::has_full_block() const { +simdjson_inline bool buf_block_reader::has_full_block() const { return idx < lenminusstep; } template -simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { +simdjson_inline const uint8_t *buf_block_reader::full_block() const { return &buf[idx]; } template -simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); @@ -8957,7 +9428,7 @@ simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t } template -simdjson_really_inline void buf_block_reader::advance() { +simdjson_inline void buf_block_reader::advance() { idx += STEP_SIZE; } @@ -8973,27 +9444,27 @@ namespace stage1 { struct json_string_block { // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } + simdjson_inline uint64_t escaped() const { return _escaped; } // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } + simdjson_inline uint64_t quote() const { return _quote; } // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } // backslash characters uint64_t _backslash; @@ -9008,14 +9479,14 @@ struct json_string_block { // Scans blocks for string characters, storing the state necessary to do so class json_string_scanner { public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); + simdjson_inline json_string_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). uint64_t prev_in_string = 0ULL; @@ -9050,7 +9521,7 @@ class json_string_scanner { // desired | x | x x x x x x x x | // text | \\\ | \\\"\\\" \\\" \\"\\" | // -simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { +simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { // If there was overflow, pretend the first character isn't a backslash backslash &= ~prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped; @@ -9075,7 +9546,7 @@ simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uin // // Backslash sequences outside of quotes will be detected in stage 2. // -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { +simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { const uint64_t backslash = in.eq('\\'); const uint64_t escaped = find_escaped(backslash); const uint64_t quote = in.eq('"') & ~escaped; @@ -9108,7 +9579,7 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s ); } -simdjson_really_inline error_code json_string_scanner::finish() { +simdjson_inline error_code json_string_scanner::finish() { if (prev_in_string) { return UNCLOSED_STRING; } @@ -9146,25 +9617,25 @@ namespace stage1 { struct json_block { public: // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} /** * The start of structurals. * In simdjson prior to v0.3, these were called the pseudo-structural characters. **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } // Helpers /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } // string and escape characters json_string_block _string; @@ -9179,12 +9650,12 @@ struct json_block { * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * They may reside inside a string. **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } /** * The start of non-operator runs, like 123, true and "abc". * It main reside inside a string. **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + simdjson_inline uint64_t potential_scalar_start() const noexcept { // The term "scalar" refers to anything except structural characters and white space // (so letters, numbers, quotes). // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space @@ -9195,7 +9666,7 @@ struct json_block { * Whether the given character is immediately after a non-operator like 123, true. * The characters following a quote are not included. */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + simdjson_inline uint64_t follows_potential_scalar() const noexcept { // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // white space. @@ -9219,10 +9690,10 @@ struct json_block { */ class json_scanner { public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -9239,13 +9710,13 @@ class json_scanner { // // const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); // -simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { const uint64_t result = match << 1 | overflow; overflow = match >> 63; return result; } -simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { json_string_block strings = string_scanner.next(in); // identifies the white-space and the structural characters json_character_block characters = json_character_block::classify(in); @@ -9270,7 +9741,7 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64 - simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; -simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); dst += in.compress(mask, dst); } -simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -9320,7 +9791,7 @@ simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size } template<> -simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { simd::simd8x64 in_1(block_buf); simd::simd8x64 in_2(block_buf+64); json_block block_1 = scanner.next(in_1); @@ -9331,7 +9802,7 @@ simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, b } template<> -simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { simd::simd8x64 in_1(block_buf); json_block block_1 = scanner.next(in_1); this->next(block_buf, block_1); @@ -9404,7 +9875,7 @@ namespace { * complete document, therefore the last json buffer location is the end of the * batch. */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { // Variant: do not count separately, just figure out depth if(parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -9479,7 +9950,7 @@ class bit_indexer { public: uint32_t *tail; - simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at @@ -9490,9 +9961,9 @@ class bit_indexer { // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own // version of the code. #ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); + simdjson_inline void write(uint32_t idx, uint64_t bits); #else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + simdjson_inline void write(uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. @@ -9601,11 +10072,11 @@ class json_structural_indexer { static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; private: - simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); template - simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -9614,26 +10085,26 @@ class json_structural_indexer { uint64_t unescaped_chars_error = 0; }; -simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} // Skip the last character if it is partial -simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { if (simdjson_unlikely(len < 3)) { switch (len) { case 2: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left return len; case 1: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left return len; case 0: return len; } } - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left return len; } @@ -9682,7 +10153,7 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_pa } template<> -simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { simd::simd8x64 in_1(block); simd::simd8x64 in_2(block+64); json_block block_1 = scanner.next(in_1); @@ -9693,14 +10164,14 @@ simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *bl } template<> -simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { simd::simd8x64 in_1(block); json_block block_1 = scanner.next(in_1); this->next(in_1, block_1, reader.block_index()); reader.advance(); } -simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { uint64_t unescaped = in.lteq(0x1F); checker.check_next_input(in); indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser @@ -9708,7 +10179,7 @@ simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64 backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ /* begin file src/generic/stage2/tape_builder.h */ /* begin file src/generic/stage2/json_iterator.h */ /* begin file src/generic/stage2/logger.h */ @@ -9879,7 +10495,7 @@ namespace logger { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } else { @@ -9888,7 +10504,7 @@ namespace logger { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -9897,7 +10513,7 @@ namespace logger { } } - simdjson_unused static simdjson_really_inline void log_string(const char *message) { + simdjson_unused static simdjson_inline void log_string(const char *message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -9905,7 +10521,7 @@ namespace logger { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; @@ -9983,14 +10599,14 @@ class json_iterator { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -9999,7 +10615,7 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *peek() const noexcept; + simdjson_inline const uint8_t *peek() const noexcept; /** * Advance to the next token. * @@ -10007,56 +10623,56 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *advance() noexcept; + simdjson_inline const uint8_t *advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char *type) const noexcept; + simdjson_inline void log_value(const char *type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; + simdjson_inline void log_start_value(const char *type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; + simdjson_inline void log_end_value(const char *type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char *error) const noexcept; + simdjson_inline void log_error(const char *error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; }; template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { logger::log_start(); // @@ -10181,52 +10797,52 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_docum } // walk_document() -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) : buf{_dom_parser.buf}, next_structural{&_dom_parser.structural_indexes[start_structural_index]}, dom_parser{_dom_parser} { } -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { +simdjson_inline const uint8_t *json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { +simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural-1); } -simdjson_really_inline bool json_iterator::at_eof() const noexcept { +simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { +simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { logger::log_line(*this, "", type, ""); } -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -10242,7 +10858,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root } } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -10274,18 +10890,18 @@ struct tape_writer { uint64_t *next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -10293,24 +10909,24 @@ struct tape_writer { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -10318,50 +10934,50 @@ struct tape_writer { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { +simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } -simdjson_really_inline void tape_writer::skip() noexcept { +simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { +simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::skip_double() noexcept { +simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -10378,40 +10994,40 @@ namespace stage2 { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -10420,22 +11036,22 @@ struct tape_builder { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -10443,18 +11059,18 @@ struct tape_builder { /** Next write location in the string buf for stage 2 parsing */ uint8_t *current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document &doc) noexcept; + simdjson_inline tape_builder(dom::document &doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; }; // class tape_builder template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept { dom_parser.doc = &doc; @@ -10463,56 +11079,56 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_docum return iter.walk_document(builder); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_root_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { return visit_string(iter, key, true); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t *dst = on_start_string(iter); dst = stringparsing::parse_string(value+1, dst); @@ -10524,16 +11140,16 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_strin return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { return visit_string(iter, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -10555,42 +11171,42 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ return error; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -10599,24 +11215,24 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ // private: -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index+2, start); tape.append(start_index, end); return SUCCESS; } -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -10629,13 +11245,13 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_contain return SUCCESS; } -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -10661,7 +11277,7 @@ namespace haswell { namespace { namespace stage1 { -simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { +simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } return find_escaped_branchless(backslash); } @@ -10691,6 +11307,10 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu return stage2::tape_builder::parse_document(*this, _doc); } +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return haswell::stringparsing::parse_string(src, dst); +} + simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { auto error = stage1(_buf, _len, stage1_mode::regular); if (error) { return error; } @@ -10751,17 +11371,17 @@ namespace { using namespace simd; struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + static simdjson_inline json_character_block classify(const simd::simd8x64& in); - simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_really_inline uint64_t op() const noexcept { return _op; } - simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } uint64_t _whitespace; uint64_t _op; }; -simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); @@ -10789,22 +11409,22 @@ simdjson_really_inline json_character_block json_character_block::classify(const return { whitespace, op }; } -simdjson_really_inline bool is_ascii(const simd8x64& input) { +simdjson_inline bool is_ascii(const simd8x64& input) { // careful: 0x80 is not ascii. - return input.reduce_or().saturating_sub(0b01111111u).bits_not_set_anywhere(); + return input.reduce_or().saturating_sub(0x7fu).bits_not_set_anywhere(); } -simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); } -simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_third_byte | is_fourth_byte) > int8_t(0); } @@ -10821,7 +11441,7 @@ namespace utf8_validation { using namespace simd; - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -10911,7 +11531,7 @@ using namespace simd; ); return (byte_1_high & byte_1_low & byte_2_high); } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, const simd8 prev_input, const simd8 sc) { simd8 prev2 = input.prev<2>(prev_input); simd8 prev3 = input.prev<3>(prev_input); @@ -10924,7 +11544,7 @@ using namespace simd; // Return nonzero if there are incomplete multibyte characters at the end of the block: // e.g. if there is a 4-byte character, but it's 3 bytes from the end. // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { + simdjson_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // ... 1111____ 111_____ 11______ #if SIMDJSON_IMPLEMENTATION_ICELAKE @@ -10936,14 +11556,14 @@ using namespace simd; 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 }; #else static const uint8_t max_array[32] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 }; #endif const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); @@ -10961,7 +11581,7 @@ using namespace simd; // // Check whether the current bytes are valid UTF-8. // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) simd8 prev1 = input.prev<1>(prev_input); @@ -10972,13 +11592,21 @@ using namespace simd; // The only problem that can happen at EOF is that a multibyte character is too short // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. - simdjson_really_inline void check_eof() { + simdjson_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // possibly finish them. this->error |= this->prev_incomplete; } - simdjson_really_inline void check_next_input(const simd8x64& input) { +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { if(simdjson_likely(is_ascii(input))) { this->error |= this->prev_incomplete; } else { @@ -10987,12 +11615,12 @@ using namespace simd; ||(simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4), "We support one, two or four chunks per 64-byte block."); - if(simd8x64::NUM_CHUNKS == 1) { + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if(simd8x64::NUM_CHUNKS == 2) { + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if(simd8x64::NUM_CHUNKS == 4) { + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); this->check_utf8_bytes(input.chunks[1], input.chunks[0]); this->check_utf8_bytes(input.chunks[2], input.chunks[1]); @@ -11003,7 +11631,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_really_inline error_code errors() { + simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -11031,10 +11659,10 @@ namespace { template struct buf_block_reader { public: - simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t *full_block() const; + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; /** * Get the last block, padded with spaces. * @@ -11044,8 +11672,8 @@ struct buf_block_reader { * * @return the number of effective characters in the last block. */ - simdjson_really_inline size_t get_remainder(uint8_t *dst) const; - simdjson_really_inline void advance(); + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); private: const uint8_t *buf; const size_t len; @@ -11084,23 +11712,23 @@ simdjson_unused static char * format_mask(uint64_t mask) { } template -simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} template -simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } +simdjson_inline size_t buf_block_reader::block_index() { return idx; } template -simdjson_really_inline bool buf_block_reader::has_full_block() const { +simdjson_inline bool buf_block_reader::has_full_block() const { return idx < lenminusstep; } template -simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { +simdjson_inline const uint8_t *buf_block_reader::full_block() const { return &buf[idx]; } template -simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); @@ -11108,7 +11736,7 @@ simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t } template -simdjson_really_inline void buf_block_reader::advance() { +simdjson_inline void buf_block_reader::advance() { idx += STEP_SIZE; } @@ -11124,27 +11752,27 @@ namespace stage1 { struct json_string_block { // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } + simdjson_inline uint64_t escaped() const { return _escaped; } // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } + simdjson_inline uint64_t quote() const { return _quote; } // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } // backslash characters uint64_t _backslash; @@ -11159,14 +11787,14 @@ struct json_string_block { // Scans blocks for string characters, storing the state necessary to do so class json_string_scanner { public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); + simdjson_inline json_string_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). uint64_t prev_in_string = 0ULL; @@ -11201,7 +11829,7 @@ class json_string_scanner { // desired | x | x x x x x x x x | // text | \\\ | \\\"\\\" \\\" \\"\\" | // -simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { +simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { // If there was overflow, pretend the first character isn't a backslash backslash &= ~prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped; @@ -11226,7 +11854,7 @@ simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uin // // Backslash sequences outside of quotes will be detected in stage 2. // -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { +simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { const uint64_t backslash = in.eq('\\'); const uint64_t escaped = find_escaped(backslash); const uint64_t quote = in.eq('"') & ~escaped; @@ -11259,7 +11887,7 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s ); } -simdjson_really_inline error_code json_string_scanner::finish() { +simdjson_inline error_code json_string_scanner::finish() { if (prev_in_string) { return UNCLOSED_STRING; } @@ -11297,25 +11925,25 @@ namespace stage1 { struct json_block { public: // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} /** * The start of structurals. * In simdjson prior to v0.3, these were called the pseudo-structural characters. **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } // Helpers /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } // string and escape characters json_string_block _string; @@ -11330,12 +11958,12 @@ struct json_block { * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * They may reside inside a string. **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } /** * The start of non-operator runs, like 123, true and "abc". * It main reside inside a string. **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + simdjson_inline uint64_t potential_scalar_start() const noexcept { // The term "scalar" refers to anything except structural characters and white space // (so letters, numbers, quotes). // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space @@ -11346,7 +11974,7 @@ struct json_block { * Whether the given character is immediately after a non-operator like 123, true. * The characters following a quote are not included. */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + simdjson_inline uint64_t follows_potential_scalar() const noexcept { // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // white space. @@ -11370,10 +11998,10 @@ struct json_block { */ class json_scanner { public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -11390,13 +12018,13 @@ class json_scanner { // // const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); // -simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { const uint64_t result = match << 1 | overflow; overflow = match >> 63; return result; } -simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { json_string_block strings = string_scanner.next(in); // identifies the white-space and the structural characters json_character_block characters = json_character_block::classify(in); @@ -11421,7 +12049,7 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64 - simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; -simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); dst += in.compress(mask, dst); } -simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -11471,7 +12099,7 @@ simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size } template<> -simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { simd::simd8x64 in_1(block_buf); simd::simd8x64 in_2(block_buf+64); json_block block_1 = scanner.next(in_1); @@ -11482,7 +12110,7 @@ simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, b } template<> -simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { simd::simd8x64 in_1(block_buf); json_block block_1 = scanner.next(in_1); this->next(block_buf, block_1); @@ -11555,7 +12183,7 @@ namespace { * complete document, therefore the last json buffer location is the end of the * batch. */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { // Variant: do not count separately, just figure out depth if(parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -11630,7 +12258,7 @@ class bit_indexer { public: uint32_t *tail; - simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at @@ -11641,9 +12269,9 @@ class bit_indexer { // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own // version of the code. #ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); + simdjson_inline void write(uint32_t idx, uint64_t bits); #else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + simdjson_inline void write(uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. @@ -11752,11 +12380,11 @@ class json_structural_indexer { static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; private: - simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); template - simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -11765,26 +12393,26 @@ class json_structural_indexer { uint64_t unescaped_chars_error = 0; }; -simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} // Skip the last character if it is partial -simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { if (simdjson_unlikely(len < 3)) { switch (len) { case 2: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left return len; case 1: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left return len; case 0: return len; } } - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left return len; } @@ -11833,7 +12461,7 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_pa } template<> -simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { simd::simd8x64 in_1(block); simd::simd8x64 in_2(block+64); json_block block_1 = scanner.next(in_1); @@ -11844,14 +12472,14 @@ simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *bl } template<> -simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { simd::simd8x64 in_1(block); json_block block_1 = scanner.next(in_1); this->next(in_1, block_1, reader.block_index()); reader.advance(); } -simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { uint64_t unescaped = in.lteq(0x1F); checker.check_next_input(in); indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser @@ -11859,7 +12487,7 @@ simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64 backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} +} // namespace stringparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ /* begin file src/generic/stage2/tape_builder.h */ /* begin file src/generic/stage2/json_iterator.h */ /* begin file src/generic/stage2/logger.h */ @@ -12031,7 +12803,7 @@ namespace logger { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } else { @@ -12040,7 +12812,7 @@ namespace logger { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -12049,7 +12821,7 @@ namespace logger { } } - simdjson_unused static simdjson_really_inline void log_string(const char *message) { + simdjson_unused static simdjson_inline void log_string(const char *message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -12057,7 +12829,7 @@ namespace logger { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; @@ -12135,14 +12907,14 @@ class json_iterator { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -12151,7 +12923,7 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *peek() const noexcept; + simdjson_inline const uint8_t *peek() const noexcept; /** * Advance to the next token. * @@ -12159,56 +12931,56 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *advance() noexcept; + simdjson_inline const uint8_t *advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char *type) const noexcept; + simdjson_inline void log_value(const char *type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; + simdjson_inline void log_start_value(const char *type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; + simdjson_inline void log_end_value(const char *type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char *error) const noexcept; + simdjson_inline void log_error(const char *error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; }; template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { logger::log_start(); // @@ -12333,52 +13105,52 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_docum } // walk_document() -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) : buf{_dom_parser.buf}, next_structural{&_dom_parser.structural_indexes[start_structural_index]}, dom_parser{_dom_parser} { } -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { +simdjson_inline const uint8_t *json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { +simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural-1); } -simdjson_really_inline bool json_iterator::at_eof() const noexcept { +simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { +simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { logger::log_line(*this, "", type, ""); } -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -12394,7 +13166,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root } } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -12426,18 +13198,18 @@ struct tape_writer { uint64_t *next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -12445,24 +13217,24 @@ struct tape_writer { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -12470,50 +13242,50 @@ struct tape_writer { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { +simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } -simdjson_really_inline void tape_writer::skip() noexcept { +simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { +simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::skip_double() noexcept { +simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -12530,40 +13302,40 @@ namespace stage2 { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -12572,22 +13344,22 @@ struct tape_builder { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -12595,18 +13367,18 @@ struct tape_builder { /** Next write location in the string buf for stage 2 parsing */ uint8_t *current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document &doc) noexcept; + simdjson_inline tape_builder(dom::document &doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; }; // class tape_builder template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept { dom_parser.doc = &doc; @@ -12615,56 +13387,56 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_docum return iter.walk_document(builder); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_root_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { return visit_string(iter, key, true); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t *dst = on_start_string(iter); dst = stringparsing::parse_string(value+1, dst); @@ -12676,16 +13448,16 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_strin return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { return visit_string(iter, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -12707,42 +13479,42 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ return error; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -12751,24 +13523,24 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ // private: -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index+2, start); tape.append(start_index, end); return SUCCESS; } -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -12781,13 +13553,13 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_contain return SUCCESS; } -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -12813,7 +13585,7 @@ namespace ppc64 { namespace { namespace stage1 { -simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { +simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { // On PPC, we don't short-circuit this if there are no backslashes, because the branch gives us no // benefit and therefore makes things worse. // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } @@ -12845,6 +13617,10 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu return stage2::tape_builder::parse_document(*this, _doc); } +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return ppc64::stringparsing::parse_string(src, dst); +} + simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { auto error = stage1(_buf, _len, stage1_mode::regular); if (error) { return error; } @@ -12908,17 +13684,17 @@ namespace { using namespace simd; struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); + static simdjson_inline json_character_block classify(const simd::simd8x64& in); - simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_really_inline uint64_t op() const noexcept { return _op; } - simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } uint64_t _whitespace; uint64_t _op; }; -simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why // we can't use the generic lookup_16. auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); @@ -12977,21 +13753,21 @@ simdjson_really_inline json_character_block json_character_block::classify(const return { whitespace, op }; } -simdjson_really_inline bool is_ascii(const simd8x64& input) { +simdjson_inline bool is_ascii(const simd8x64& input) { return input.reduce_or().is_ascii(); } -simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); } -simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. return simd8(is_third_byte | is_fourth_byte) > int8_t(0); } @@ -13008,7 +13784,7 @@ namespace utf8_validation { using namespace simd; - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) // Bit 1 = Too Long (ASCII followed by continuation) // Bit 2 = Overlong 3-byte @@ -13098,7 +13874,7 @@ using namespace simd; ); return (byte_1_high & byte_1_low & byte_2_high); } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, const simd8 prev_input, const simd8 sc) { simd8 prev2 = input.prev<2>(prev_input); simd8 prev3 = input.prev<3>(prev_input); @@ -13111,7 +13887,7 @@ using namespace simd; // Return nonzero if there are incomplete multibyte characters at the end of the block: // e.g. if there is a 4-byte character, but it's 3 bytes from the end. // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { + simdjson_inline simd8 is_incomplete(const simd8 input) { // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): // ... 1111____ 111_____ 11______ #if SIMDJSON_IMPLEMENTATION_ICELAKE @@ -13123,14 +13899,14 @@ using namespace simd; 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 }; #else static const uint8_t max_array[32] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 }; #endif const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); @@ -13148,7 +13924,7 @@ using namespace simd; // // Check whether the current bytes are valid UTF-8. // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) simd8 prev1 = input.prev<1>(prev_input); @@ -13159,13 +13935,21 @@ using namespace simd; // The only problem that can happen at EOF is that a multibyte character is too short // or a byte value too large in the last bytes: check_special_cases only checks for bytes // too large in the first of two bytes. - simdjson_really_inline void check_eof() { + simdjson_inline void check_eof() { // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't // possibly finish them. this->error |= this->prev_incomplete; } - simdjson_really_inline void check_next_input(const simd8x64& input) { +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { if(simdjson_likely(is_ascii(input))) { this->error |= this->prev_incomplete; } else { @@ -13174,12 +13958,12 @@ using namespace simd; ||(simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4), "We support one, two or four chunks per 64-byte block."); - if(simd8x64::NUM_CHUNKS == 1) { + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if(simd8x64::NUM_CHUNKS == 2) { + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if(simd8x64::NUM_CHUNKS == 4) { + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { this->check_utf8_bytes(input.chunks[0], this->prev_input_block); this->check_utf8_bytes(input.chunks[1], input.chunks[0]); this->check_utf8_bytes(input.chunks[2], input.chunks[1]); @@ -13190,7 +13974,7 @@ using namespace simd; } } // do not forget to call check_eof! - simdjson_really_inline error_code errors() { + simdjson_inline error_code errors() { return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; } @@ -13218,10 +14002,10 @@ namespace { template struct buf_block_reader { public: - simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t *full_block() const; + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; /** * Get the last block, padded with spaces. * @@ -13231,8 +14015,8 @@ struct buf_block_reader { * * @return the number of effective characters in the last block. */ - simdjson_really_inline size_t get_remainder(uint8_t *dst) const; - simdjson_really_inline void advance(); + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); private: const uint8_t *buf; const size_t len; @@ -13271,23 +14055,23 @@ simdjson_unused static char * format_mask(uint64_t mask) { } template -simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} template -simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } +simdjson_inline size_t buf_block_reader::block_index() { return idx; } template -simdjson_really_inline bool buf_block_reader::has_full_block() const { +simdjson_inline bool buf_block_reader::has_full_block() const { return idx < lenminusstep; } template -simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { +simdjson_inline const uint8_t *buf_block_reader::full_block() const { return &buf[idx]; } template -simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. std::memcpy(dst, buf + idx, len - idx); @@ -13295,7 +14079,7 @@ simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t } template -simdjson_really_inline void buf_block_reader::advance() { +simdjson_inline void buf_block_reader::advance() { idx += STEP_SIZE; } @@ -13311,27 +14095,27 @@ namespace stage1 { struct json_string_block { // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } + simdjson_inline uint64_t escaped() const { return _escaped; } // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } + simdjson_inline uint64_t quote() const { return _quote; } // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } // backslash characters uint64_t _backslash; @@ -13346,14 +14130,14 @@ struct json_string_block { // Scans blocks for string characters, storing the state necessary to do so class json_string_scanner { public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); + simdjson_inline json_string_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). uint64_t prev_in_string = 0ULL; @@ -13388,7 +14172,7 @@ class json_string_scanner { // desired | x | x x x x x x x x | // text | \\\ | \\\"\\\" \\\" \\"\\" | // -simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { +simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { // If there was overflow, pretend the first character isn't a backslash backslash &= ~prev_escaped; uint64_t follows_escape = backslash << 1 | prev_escaped; @@ -13413,7 +14197,7 @@ simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uin // // Backslash sequences outside of quotes will be detected in stage 2. // -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { +simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { const uint64_t backslash = in.eq('\\'); const uint64_t escaped = find_escaped(backslash); const uint64_t quote = in.eq('"') & ~escaped; @@ -13446,7 +14230,7 @@ simdjson_really_inline json_string_block json_string_scanner::next(const simd::s ); } -simdjson_really_inline error_code json_string_scanner::finish() { +simdjson_inline error_code json_string_scanner::finish() { if (prev_in_string) { return UNCLOSED_STRING; } @@ -13484,25 +14268,25 @@ namespace stage1 { struct json_block { public: // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} /** * The start of structurals. * In simdjson prior to v0.3, these were called the pseudo-structural characters. **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } // Helpers /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } // string and escape characters json_string_block _string; @@ -13517,12 +14301,12 @@ struct json_block { * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". * They may reside inside a string. **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } /** * The start of non-operator runs, like 123, true and "abc". * It main reside inside a string. **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { + simdjson_inline uint64_t potential_scalar_start() const noexcept { // The term "scalar" refers to anything except structural characters and white space // (so letters, numbers, quotes). // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space @@ -13533,7 +14317,7 @@ struct json_block { * Whether the given character is immediately after a non-operator like 123, true. * The characters following a quote are not included. */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { + simdjson_inline uint64_t follows_potential_scalar() const noexcept { // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a // white space. @@ -13557,10 +14341,10 @@ struct json_block { */ class json_scanner { public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); + simdjson_inline error_code finish(); private: // Whether the last character of the previous iteration is part of a scalar token @@ -13577,13 +14361,13 @@ class json_scanner { // // const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); // -simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { const uint64_t result = match << 1 | overflow; overflow = match >> 63; return result; } -simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { json_string_block strings = string_scanner.next(in); // identifies the white-space and the structural characters json_character_block characters = json_character_block::classify(in); @@ -13608,7 +14392,7 @@ simdjson_really_inline json_block json_scanner::next(const simd::simd8x64 - simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); json_scanner scanner{}; uint8_t *dst; }; -simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { uint64_t mask = block.whitespace(); dst += in.compress(mask, dst); } -simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { error_code error = scanner.finish(); if (error) { dst_len = 0; return error; } dst_len = dst - dst_start; @@ -13658,7 +14442,7 @@ simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size } template<> -simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { simd::simd8x64 in_1(block_buf); simd::simd8x64 in_2(block_buf+64); json_block block_1 = scanner.next(in_1); @@ -13669,7 +14453,7 @@ simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, b } template<> -simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { simd::simd8x64 in_1(block_buf); json_block block_1 = scanner.next(in_1); this->next(block_buf, block_1); @@ -13742,7 +14526,7 @@ namespace { * complete document, therefore the last json buffer location is the end of the * batch. */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { // Variant: do not count separately, just figure out depth if(parser.n_structural_indexes == 0) { return 0; } auto arr_cnt = 0; @@ -13817,7 +14601,7 @@ class bit_indexer { public: uint32_t *tail; - simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} // flatten out values in 'bits' assuming that they are are to have values of idx // plus their position in the bitvector, and store these indexes at @@ -13828,9 +14612,9 @@ class bit_indexer { // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own // version of the code. #ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); + simdjson_inline void write(uint32_t idx, uint64_t bits); #else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { + simdjson_inline void write(uint32_t idx, uint64_t bits) { // In some instances, the next branch is expensive because it is mispredicted. // Unfortunately, in other cases, // it helps tremendously. @@ -13939,11 +14723,11 @@ class json_structural_indexer { static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; private: - simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); template - simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); json_scanner scanner{}; utf8_checker checker{}; @@ -13952,26 +14736,26 @@ class json_structural_indexer { uint64_t unescaped_chars_error = 0; }; -simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} // Skip the last character if it is partial -simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { if (simdjson_unlikely(len < 3)) { switch (len) { case 2: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left return len; case 1: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left return len; case 0: return len; } } - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left return len; } @@ -14020,7 +14804,7 @@ error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_pa } template<> -simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { simd::simd8x64 in_1(block); simd::simd8x64 in_2(block+64); json_block block_1 = scanner.next(in_1); @@ -14031,14 +14815,14 @@ simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *bl } template<> -simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { simd::simd8x64 in_1(block); json_block block_1 = scanner.next(in_1); this->next(in_1, block_1, reader.block_index()); reader.advance(); } -simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { uint64_t unescaped = in.lteq(0x1F); checker.check_next_input(in); indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser @@ -14046,7 +14830,7 @@ simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64 backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ /* begin file src/generic/stage2/tape_builder.h */ /* begin file src/generic/stage2/json_iterator.h */ /* begin file src/generic/stage2/logger.h */ @@ -14217,7 +15146,7 @@ namespace logger { static int log_depth; // Not threadsafe. Log only. // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { + static simdjson_inline char printable_char(char c) { if (c >= 0x20) { return c; } else { @@ -14226,7 +15155,7 @@ namespace logger { } // Print the header and set up log_start - static simdjson_really_inline void log_start() { + static simdjson_inline void log_start() { if (LOG_ENABLED) { log_depth = 0; printf("\n"); @@ -14235,7 +15164,7 @@ namespace logger { } } - simdjson_unused static simdjson_really_inline void log_string(const char *message) { + simdjson_unused static simdjson_inline void log_string(const char *message) { if (LOG_ENABLED) { printf("%s\n", message); } @@ -14243,7 +15172,7 @@ namespace logger { // Logs a single line from the stage 2 DOM parser template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { if (LOG_ENABLED) { printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; @@ -14321,14 +15250,14 @@ class json_iterator { * - increment_count(iter) - each time a value is found in an array or object. */ template - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; /** * Create an iterator capable of walking a JSON document. * * The document must have already passed through stage 1. */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); /** * Look at the next token. @@ -14337,7 +15266,7 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *peek() const noexcept; + simdjson_inline const uint8_t *peek() const noexcept; /** * Advance to the next token. * @@ -14345,56 +15274,56 @@ class json_iterator { * * They may include invalid JSON as well (such as `1.2.3` or `ture`). */ - simdjson_really_inline const uint8_t *advance() noexcept; + simdjson_inline const uint8_t *advance() noexcept; /** * Get the remaining length of the document, from the start of the current token. */ - simdjson_really_inline size_t remaining_len() const noexcept; + simdjson_inline size_t remaining_len() const noexcept; /** * Check if we are at the end of the document. * * If this is true, there are no more tokens. */ - simdjson_really_inline bool at_eof() const noexcept; + simdjson_inline bool at_eof() const noexcept; /** * Check if we are at the beginning of the document. */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; /** * Log that a value has been found. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_value(const char *type) const noexcept; + simdjson_inline void log_value(const char *type) const noexcept; /** * Log the start of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; + simdjson_inline void log_start_value(const char *type) const noexcept; /** * Log the end of a multipart value. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; + simdjson_inline void log_end_value(const char *type) const noexcept; /** * Log an error. * * Set LOG_ENABLED=true in logger.h to see logging. */ - simdjson_really_inline void log_error(const char *error) const noexcept; + simdjson_inline void log_error(const char *error) const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; }; template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { logger::log_start(); // @@ -14519,52 +15448,52 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_docum } // walk_document() -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) : buf{_dom_parser.buf}, next_structural{&_dom_parser.structural_indexes[start_structural_index]}, dom_parser{_dom_parser} { } -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { return &buf[*(next_structural)]; } -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { +simdjson_inline const uint8_t *json_iterator::advance() noexcept { return &buf[*(next_structural++)]; } -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { +simdjson_inline size_t json_iterator::remaining_len() const noexcept { return dom_parser.len - *(next_structural-1); } -simdjson_really_inline bool json_iterator::at_eof() const noexcept { +simdjson_inline bool json_iterator::at_eof() const noexcept { return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; } -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { +simdjson_inline bool json_iterator::at_beginning() const noexcept { return next_structural == dom_parser.structural_indexes.get(); } -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; } -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { logger::log_line(*this, "", type, ""); } -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { logger::log_line(*this, "+", type, ""); if (logger::LOG_ENABLED) { logger::log_depth++; } } -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { if (logger::LOG_ENABLED) { logger::log_depth--; } logger::log_line(*this, "-", type, ""); } -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { logger::log_line(*this, "", "ERROR", error); } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_root_string(*this, value); case 't': return visitor.visit_root_true_atom(*this, value); @@ -14580,7 +15509,7 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root } } template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { switch (*value) { case '"': return visitor.visit_string(*this, value); case 't': return visitor.visit_true_atom(*this, value); @@ -14612,18 +15541,18 @@ struct tape_writer { uint64_t *next_tape_loc; /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** * Append a tape entry (an 8-bit type,and 56 bits worth of value). */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; /** * Skip the current tape entry without writing. @@ -14631,24 +15560,24 @@ struct tape_writer { * Used to skip the start of the container, since we'll come back later to fill it in when the * container ends. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Skip the number of tape entries necessary to write a large u64 or i64. */ - simdjson_really_inline void skip_large_integer() noexcept; + simdjson_inline void skip_large_integer() noexcept; /** * Skip the number of tape entries necessary to write a double. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * Write a value to a known location on tape. * * Used to go back and write out the start of a container after the container ends. */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; private: /** @@ -14656,50 +15585,50 @@ struct tape_writer { * all 64 bits, such as double and uint64_t. */ template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; }; // struct number_writer -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { append2(0, value, internal::tape_type::INT64); } -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { append(0, internal::tape_type::UINT64); *next_tape_loc = value; next_tape_loc++; } /** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { +simdjson_inline void tape_writer::append_double(double value) noexcept { append2(0, value, internal::tape_type::DOUBLE); } -simdjson_really_inline void tape_writer::skip() noexcept { +simdjson_inline void tape_writer::skip() noexcept { next_tape_loc++; } -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { +simdjson_inline void tape_writer::skip_large_integer() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::skip_double() noexcept { +simdjson_inline void tape_writer::skip_double() noexcept { next_tape_loc += 2; } -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { *next_tape_loc = val | ((uint64_t(char(t))) << 56); next_tape_loc++; } template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { append(val, t); static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); memcpy(next_tape_loc, &val2, sizeof(val2)); next_tape_loc++; } -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { tape_loc = val | ((uint64_t(char(t))) << 56); } @@ -14716,40 +15645,40 @@ namespace stage2 { struct tape_builder { template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( + simdjson_warn_unused static simdjson_inline error_code parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept; /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; /** * Called when a key in a field is encountered. * * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array * will be called after this with the field value. */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; /** * Called when a string, number, boolean or null is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; /** * Called when a string, number, boolean or null is found at the top level of a document (i.e. * when there is no array or object and the entire document is a single string, number, boolean or @@ -14758,22 +15687,22 @@ struct tape_builder { * This is separate from primitive() because simdjson's normal primitive parsing routines assume * there is at least one more token after the value, which is only true in an array or object. */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; /** Next location to write to tape */ tape_writer tape; @@ -14781,18 +15710,18 @@ struct tape_builder { /** Next write location in the string buf for stage 2 parsing */ uint8_t *current_string_buf_loc; - simdjson_really_inline tape_builder(dom::document &doc) noexcept; + simdjson_inline tape_builder(dom::document &doc) noexcept; - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; }; // class tape_builder template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( dom_parser_implementation &dom_parser, dom::document &doc) noexcept { dom_parser.doc = &doc; @@ -14801,56 +15730,56 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_docum return iter.walk_document(builder); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_root_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { return iter.visit_primitive(*this, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { start_container(iter); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { constexpr uint32_t start_tape_index = 0; tape.append(start_tape_index, internal::tape_type::ROOT); tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { return visit_string(iter, key, true); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 return SUCCESS; } -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { iter.log_value(key ? "key" : "string"); uint8_t *dst = on_start_string(iter); dst = stringparsing::parse_string(value+1, dst); @@ -14862,16 +15791,16 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_strin return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { return visit_string(iter, value); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("number"); return numberparsing::parse_number(value, tape); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { // // We need to make a copy to make sure that the string is space terminated. // This is not about padding the input, which should already padded up @@ -14893,42 +15822,42 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ return error; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("true"); if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } tape.append(0, internal::tape_type::TRUE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("false"); if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } tape.append(0, internal::tape_type::FALSE_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); return SUCCESS; } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { iter.log_value("null"); if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } tape.append(0, internal::tape_type::NULL_VALUE); @@ -14937,24 +15866,24 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_ // private: -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { auto start_index = next_tape_index(iter); tape.append(start_index+2, start); tape.append(start_index, end); return SUCCESS; } -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); iter.dom_parser.open_containers[iter.depth].count = 0; tape.skip(); // We don't actually *write* the start element until the end. } -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { // Write the ending tape element, pointing at the start location const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; tape.append(start_tape_index, end); @@ -14967,13 +15896,13 @@ simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_contain return SUCCESS; } -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { // we advance the point, accounting for the fact that we have a NULL termination tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); return current_string_buf_loc + sizeof(uint32_t); } -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); // TODO check for overflow in case someone has a crazy string (>=4GB?) // But only add the overflow check when the document itself exceeds 4GB @@ -15000,7 +15929,7 @@ namespace westmere { namespace { namespace stage1 { -simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { +simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } return find_escaped_branchless(backslash); } @@ -15030,6 +15959,10 @@ simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::docu return stage2::tape_builder::parse_document(*this, _doc); } +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return westmere::stringparsing::parse_string(src, dst); +} + simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { auto error = stage1(_buf, _len, stage1_mode::regular); if (error) { return error; } diff --git a/simdjson/simdjson.h b/simdjson/simdjson.h index 67cb53b..f9d8081 100644 --- a/simdjson/simdjson.h +++ b/simdjson/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on 2022-06-02 13:56:20 -0400. Do not edit! */ +/* auto-generated on 2022-11-23 10:31:42 -0500. Do not edit! */ /* begin file include/simdjson.h */ #ifndef SIMDJSON_H #define SIMDJSON_H @@ -43,14 +43,14 @@ #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION 2.0.3 +#define SIMDJSON_VERSION 3.0.1 namespace simdjson { enum { /** * The major version (MAJOR.minor.revision) of simdjson being used. */ - SIMDJSON_VERSION_MAJOR = 2, + SIMDJSON_VERSION_MAJOR = 3, /** * The minor version (major.MINOR.revision) of simdjson being used. */ @@ -58,7 +58,7 @@ enum { /** * The revision (major.minor.REVISION) of simdjson being used. */ - SIMDJSON_VERSION_REVISION = 3 + SIMDJSON_VERSION_REVISION = 1 }; } // namespace simdjson @@ -329,12 +329,12 @@ constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; /** * The amount of padding needed in a buffer to parse JSON. * - * the input buf should be readable up to buf + SIMDJSON_PADDING + * The input buf should be readable up to buf + SIMDJSON_PADDING * this is a stopgap; there should be a better description of the * main loop and its behavior that abstracts over this * See https://github.com/simdjson/simdjson/issues/174 */ -constexpr size_t SIMDJSON_PADDING = 32; +constexpr size_t SIMDJSON_PADDING = 64; /** * By default, simdjson supports this many nested objects and arrays. @@ -460,6 +460,18 @@ constexpr size_t DEFAULT_MAX_DEPTH = 1024; #endif // MSC_VER +#if defined(simdjson_inline) + // Prefer the user's definition of simdjson_inline; don't define it ourselves. +#elif defined(__GNUC__) && !defined(__OPTIMIZE__) + // If optimizations are disabled, forcing inlining can lead to significant + // code bloat and high compile times. Don't use simdjson_really_inline for + // unoptimized builds. + #define simdjson_inline inline +#else + // Force inlining for most simdjson functions. + #define simdjson_inline simdjson_really_inline +#endif + #if defined(SIMDJSON_VISUAL_STUDIO) /** * Windows users need to do some extra work when building @@ -2197,11 +2209,30 @@ namespace std { /// If EXPR is an error, returns it. #define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } +// Unless the programmer has already set SIMDJSON_DEVELOPMENT_CHECKS, +// we want to set it under debug builds. We detect a debug build +// under Visual Studio when the _DEBUG macro is set. Under the other +// compilers, we use the fact that they define __OPTIMIZE__ whenever +// they allow optimizations. +// It is possible that this could miss some cases where SIMDJSON_DEVELOPMENT_CHECKS +// is helpful, but the programmer can set the macro SIMDJSON_DEVELOPMENT_CHECKS. +// It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer +// sets _DEBUG in a release build under Visual Studio, or if some compiler fails to +// set the __OPTIMIZE__ macro). #ifndef SIMDJSON_DEVELOPMENT_CHECKS -#ifndef NDEBUG -#define SIMDJSON_DEVELOPMENT_CHECKS -#endif -#endif +#ifdef _MSC_VER +// Visual Studio seems to set _DEBUG for debug builds. +#ifdef _DEBUG +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // _DEBUG +#else // _MSC_VER +// All other compilers appear to set __OPTIMIZE__ to a positive integer +// when the compiler is optimizing. +#ifndef __OPTIMIZE__ +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // __OPTIMIZE__ +#endif // _MSC_VER +#endif // SIMDJSON_DEVELOPMENT_CHECKS // The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" // feature. @@ -2222,6 +2253,13 @@ namespace std { # define simdjson_fallthrough do {} while (0) /* fallthrough */ #endif // simdjson_fallthrough + +#if SIMDJSON_DEVELOPMENT_CHECKS +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { assert ((expr)); } while (0) +#else +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { } while (0) +#endif + #endif // SIMDJSON_COMMON_DEFS_H /* end file include/simdjson/common_defs.h */ @@ -2249,7 +2287,7 @@ enum error_code { SUCCESS = 0, ///< No error CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory - TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error + TAPE_ERROR, ///< Something went wrong, this is a generic error DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' @@ -2276,6 +2314,7 @@ enum error_code { INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document. + TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input NUM_ERROR_CODES }; @@ -2342,22 +2381,22 @@ struct simdjson_result_base : protected std::pair { /** * Create a new empty result with error = UNINITIALIZED. */ - simdjson_really_inline simdjson_result_base() noexcept; + simdjson_inline simdjson_result_base() noexcept; /** * Create a new error result. */ - simdjson_really_inline simdjson_result_base(error_code error) noexcept; + simdjson_inline simdjson_result_base(error_code error) noexcept; /** * Create a new successful result. */ - simdjson_really_inline simdjson_result_base(T &&value) noexcept; + simdjson_inline simdjson_result_base(T &&value) noexcept; /** * Create a new result with both things (use if you don't want to branch when creating the result). */ - simdjson_really_inline simdjson_result_base(T &&value, error_code error) noexcept; + simdjson_inline simdjson_result_base(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. @@ -2365,26 +2404,19 @@ struct simdjson_result_base : protected std::pair { * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ - simdjson_really_inline void tie(T &value, error_code &error) && noexcept; + simdjson_inline void tie(T &value, error_code &error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_really_inline error_code get(T &value) && noexcept; - - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_really_inline const T &value(error_code &error) const & noexcept; + simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_really_inline error_code error() const noexcept; + simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -2393,41 +2425,41 @@ struct simdjson_result_base : protected std::pair { * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T& value() & noexcept(false); + simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& value() && noexcept(false); + simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& take_value() && noexcept(false); + simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline operator T&&() && noexcept(false); + simdjson_inline operator T&&() && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline const T& value_unsafe() const& noexcept; + simdjson_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline T&& value_unsafe() && noexcept; + simdjson_inline T&& value_unsafe() && noexcept; }; // struct simdjson_result_base @@ -2443,19 +2475,19 @@ struct simdjson_result : public internal::simdjson_result_base { /** * @private Create a new empty result with error = UNINITIALIZED. */ - simdjson_really_inline simdjson_result() noexcept; + simdjson_inline simdjson_result() noexcept; /** * @private Create a new error result. */ - simdjson_really_inline simdjson_result(T &&value) noexcept; + simdjson_inline simdjson_result(T &&value) noexcept; /** * @private Create a new successful result. */ - simdjson_really_inline simdjson_result(error_code error_code) noexcept; + simdjson_inline simdjson_result(error_code error_code) noexcept; /** * @private Create a new result with both things (use if you don't want to branch when creating the result). */ - simdjson_really_inline simdjson_result(T &&value, error_code error) noexcept; + simdjson_inline simdjson_result(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. @@ -2463,19 +2495,19 @@ struct simdjson_result : public internal::simdjson_result_base { * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ - simdjson_really_inline void tie(T &value, error_code &error) && noexcept; + simdjson_inline void tie(T &value, error_code &error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_warn_unused simdjson_really_inline error_code get(T &value) && noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_really_inline error_code error() const noexcept; + simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -2484,41 +2516,41 @@ struct simdjson_result : public internal::simdjson_result_base { * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T& value() & noexcept(false); + simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& value() && noexcept(false); + simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& take_value() && noexcept(false); + simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline operator T&&() && noexcept(false); + simdjson_inline operator T&&() && noexcept(false); #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline const T& value_unsafe() const& noexcept; + simdjson_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline T&& value_unsafe() && noexcept; + simdjson_inline T&& value_unsafe() && noexcept; }; // struct simdjson_result @@ -2926,6 +2958,22 @@ class dom_parser_implementation { */ simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0; + /** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept = 0; + /** * Change the capacity of this parser. * @@ -2968,14 +3016,14 @@ class dom_parser_implementation { * * @return Current capacity, in bytes. */ - simdjson_really_inline size_t capacity() const noexcept; + simdjson_inline size_t capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ - simdjson_really_inline size_t max_depth() const noexcept; + simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -2987,6 +3035,7 @@ class dom_parser_implementation { */ simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept; + protected: /** * The maximum document length this parser supports. @@ -3003,23 +3052,23 @@ class dom_parser_implementation { size_t _max_depth{0}; // Declaring these so that subclasses can use them to implement their constructors. - simdjson_really_inline dom_parser_implementation() noexcept; - simdjson_really_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - simdjson_really_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + simdjson_inline dom_parser_implementation() noexcept; + simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - simdjson_really_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; - simdjson_really_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; + simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; + simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; }; // class dom_parser_implementation -simdjson_really_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -simdjson_really_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -simdjson_really_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; +simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; -simdjson_really_inline size_t dom_parser_implementation::capacity() const noexcept { +simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { return _capacity; } -simdjson_really_inline size_t dom_parser_implementation::max_depth() const noexcept { +simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { return _max_depth; } @@ -3159,7 +3208,7 @@ constexpr uint32_t cpuid_avx512pf_bit = 1 << 26; ///< @private bit 26 of EBX constexpr uint32_t cpuid_avx512er_bit = 1 << 27; ///< @private bit 27 of EBX for EAX=0x7 constexpr uint32_t cpuid_avx512cd_bit = 1 << 28; ///< @private bit 28 of EBX for EAX=0x7 constexpr uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7 -constexpr uint32_t cpuid_avx512vl_bit = 1 << 31; ///< @private bit 31 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512vl_bit = 1U << 31; ///< @private bit 31 of EBX for EAX=0x7 constexpr uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7 constexpr uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 constexpr uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 @@ -3287,15 +3336,13 @@ namespace simdjson { * @return true if the string is valid UTF-8. */ simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; - - /** * Validate the UTF-8 string. * * @param sv the string_view to validate. * @return true if the string is valid UTF-8. */ -simdjson_really_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { return validate_utf8(sv.data(), sv.size()); } @@ -3305,7 +3352,7 @@ simdjson_really_inline simdjson_warn_unused bool validate_utf8(const std::string * @param p the string to validate. * @return true if the string is valid UTF-8. */ -simdjson_really_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { return validate_utf8(s.data(), s.size()); } @@ -3328,7 +3375,7 @@ class implementation { * const implementation *impl = simdjson::get_active_implementation(); * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". */ virtual const std::string &name() const { return _name; } @@ -3338,7 +3385,7 @@ class implementation { * const implementation *impl = simdjson::get_active_implementation(); * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". */ virtual const std::string &description() const { return _description; } @@ -3347,8 +3394,7 @@ class implementation { * and the current CPU match. This function may poll the current CPU/system * and should therefore not be called too often if performance is a concern. * - * - * @return true if the implementation can be safely used on the current system (determined at runtime) + * @return true if the implementation can be safely used on the current system (determined at runtime). */ bool supported_by_runtime_system() const; @@ -3357,7 +3403,7 @@ class implementation { * * The instruction sets this implementation is compiled against. * - * @return a mask of all required `internal::instruction_set::` values + * @return a mask of all required `internal::instruction_set::` values. */ virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }; @@ -3370,7 +3416,7 @@ class implementation { * @param capacity The largest document that will be passed to the parser. * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. * @param dst The place to put the resulting parser implementation. - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" + * @return the error code, or SUCCESS if there was no error. */ virtual error_code create_dom_parser_implementation( size_t capacity, @@ -3407,7 +3453,7 @@ class implementation { protected: /** @private Construct an implementation with the given name and description. For subclasses. */ - simdjson_really_inline implementation( + simdjson_inline implementation( std::string_view name, std::string_view description, uint32_t required_instruction_sets @@ -3445,7 +3491,7 @@ namespace internal { class available_implementation_list { public: /** Get the list of available implementations compiled into simdjson */ - simdjson_really_inline available_implementation_list() {} + simdjson_inline available_implementation_list() {} /** Number of implementations */ size_t size() const noexcept; /** STL const begin() iterator */ @@ -3572,7 +3618,7 @@ namespace internal { // template -simdjson_really_inline void simdjson_result_base::tie(T &value, error_code &error) && noexcept { +simdjson_inline void simdjson_result_base::tie(T &value, error_code &error) && noexcept { error = this->second; if (!error) { value = std::forward>(*this).first; @@ -3580,64 +3626,64 @@ simdjson_really_inline void simdjson_result_base::tie(T &value, error_code &e } template -simdjson_warn_unused simdjson_really_inline error_code simdjson_result_base::get(T &value) && noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result_base::get(T &value) && noexcept { error_code error; std::forward>(*this).tie(value, error); return error; } template -simdjson_really_inline error_code simdjson_result_base::error() const noexcept { +simdjson_inline error_code simdjson_result_base::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template -simdjson_really_inline T& simdjson_result_base::value() & noexcept(false) { +simdjson_inline T& simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; } template -simdjson_really_inline T&& simdjson_result_base::value() && noexcept(false) { +simdjson_inline T&& simdjson_result_base::value() && noexcept(false) { return std::forward>(*this).take_value(); } template -simdjson_really_inline T&& simdjson_result_base::take_value() && noexcept(false) { +simdjson_inline T&& simdjson_result_base::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(this->first); } template -simdjson_really_inline simdjson_result_base::operator T&&() && noexcept(false) { +simdjson_inline simdjson_result_base::operator T&&() && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template -simdjson_really_inline const T& simdjson_result_base::value_unsafe() const& noexcept { +simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { return this->first; } template -simdjson_really_inline T&& simdjson_result_base::value_unsafe() && noexcept { +simdjson_inline T&& simdjson_result_base::value_unsafe() && noexcept { return std::forward(this->first); } template -simdjson_really_inline simdjson_result_base::simdjson_result_base(T &&value, error_code error) noexcept +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value, error_code error) noexcept : std::pair(std::forward(value), error) {} template -simdjson_really_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept +simdjson_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept : simdjson_result_base(T{}, error) {} template -simdjson_really_inline simdjson_result_base::simdjson_result_base(T &&value) noexcept +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value) noexcept : simdjson_result_base(std::forward(value), SUCCESS) {} template -simdjson_really_inline simdjson_result_base::simdjson_result_base() noexcept +simdjson_inline simdjson_result_base::simdjson_result_base() noexcept : simdjson_result_base(T{}, UNINITIALIZED) {} } // namespace internal @@ -3647,65 +3693,65 @@ simdjson_really_inline simdjson_result_base::simdjson_result_base() noexcept /// template -simdjson_really_inline void simdjson_result::tie(T &value, error_code &error) && noexcept { +simdjson_inline void simdjson_result::tie(T &value, error_code &error) && noexcept { std::forward>(*this).tie(value, error); } template -simdjson_warn_unused simdjson_really_inline error_code simdjson_result::get(T &value) && noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) && noexcept { return std::forward>(*this).get(value); } template -simdjson_really_inline error_code simdjson_result::error() const noexcept { +simdjson_inline error_code simdjson_result::error() const noexcept { return internal::simdjson_result_base::error(); } #if SIMDJSON_EXCEPTIONS template -simdjson_really_inline T& simdjson_result::value() & noexcept(false) { +simdjson_inline T& simdjson_result::value() & noexcept(false) { return internal::simdjson_result_base::value(); } template -simdjson_really_inline T&& simdjson_result::value() && noexcept(false) { +simdjson_inline T&& simdjson_result::value() && noexcept(false) { return std::forward>(*this).value(); } template -simdjson_really_inline T&& simdjson_result::take_value() && noexcept(false) { +simdjson_inline T&& simdjson_result::take_value() && noexcept(false) { return std::forward>(*this).take_value(); } template -simdjson_really_inline simdjson_result::operator T&&() && noexcept(false) { +simdjson_inline simdjson_result::operator T&&() && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template -simdjson_really_inline const T& simdjson_result::value_unsafe() const& noexcept { +simdjson_inline const T& simdjson_result::value_unsafe() const& noexcept { return internal::simdjson_result_base::value_unsafe(); } template -simdjson_really_inline T&& simdjson_result::value_unsafe() && noexcept { +simdjson_inline T&& simdjson_result::value_unsafe() && noexcept { return std::forward>(*this).value_unsafe(); } template -simdjson_really_inline simdjson_result::simdjson_result(T &&value, error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base(std::forward(value), error) {} template -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} template -simdjson_really_inline simdjson_result::simdjson_result(T &&value) noexcept +simdjson_inline simdjson_result::simdjson_result(T &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} template -simdjson_really_inline simdjson_result::simdjson_result() noexcept +simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} } // namespace simdjson @@ -3758,7 +3804,7 @@ inline char *allocate_padded_buffer(size_t length) noexcept { } // namespace internal -inline padded_string::padded_string() noexcept {} +inline padded_string::padded_string() noexcept = default; inline padded_string::padded_string(size_t length) noexcept : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { } @@ -3840,7 +3886,13 @@ inline simdjson_result padded_string::load(std::string_view filen } // Get the file size - if(std::fseek(fp, 0, SEEK_END) < 0) { + int ret; +#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if(ret < 0) { std::fclose(fp); return IO_ERROR; } @@ -3984,25 +4036,26 @@ constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF; */ class tape_ref { public: - simdjson_really_inline tape_ref() noexcept; - simdjson_really_inline tape_ref(const dom::document *doc, size_t json_index) noexcept; + simdjson_inline tape_ref() noexcept; + simdjson_inline tape_ref(const dom::document *doc, size_t json_index) noexcept; inline size_t after_element() const noexcept; - simdjson_really_inline tape_type tape_ref_type() const noexcept; - simdjson_really_inline uint64_t tape_value() const noexcept; - simdjson_really_inline bool is_double() const noexcept; - simdjson_really_inline bool is_int64() const noexcept; - simdjson_really_inline bool is_uint64() const noexcept; - simdjson_really_inline bool is_false() const noexcept; - simdjson_really_inline bool is_true() const noexcept; - simdjson_really_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. - simdjson_really_inline uint32_t matching_brace_index() const noexcept; - simdjson_really_inline uint32_t scope_count() const noexcept; + simdjson_inline tape_type tape_ref_type() const noexcept; + simdjson_inline uint64_t tape_value() const noexcept; + simdjson_inline bool is_double() const noexcept; + simdjson_inline bool is_int64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; + simdjson_inline bool is_false() const noexcept; + simdjson_inline bool is_true() const noexcept; + simdjson_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. + simdjson_inline uint32_t matching_brace_index() const noexcept; + simdjson_inline uint32_t scope_count() const noexcept; template - simdjson_really_inline T next_tape_value() const noexcept; - simdjson_really_inline uint32_t get_string_length() const noexcept; - simdjson_really_inline const char * get_c_str() const noexcept; + simdjson_inline T next_tape_value() const noexcept; + simdjson_inline uint32_t get_string_length() const noexcept; + simdjson_inline const char * get_c_str() const noexcept; inline std::string_view get_string_view() const noexcept; - simdjson_really_inline bool is_document_root() const noexcept; + simdjson_inline bool is_document_root() const noexcept; + simdjson_inline bool usable() const noexcept; /** The document this element references. */ const dom::document *doc; @@ -4034,7 +4087,7 @@ class element; class array { public: /** Create a new, invalid array */ - simdjson_really_inline array() noexcept; + simdjson_inline array() noexcept; class iterator { public: @@ -4074,7 +4127,7 @@ class array { iterator(const iterator&) noexcept = default; iterator& operator=(const iterator&) noexcept = default; private: - simdjson_really_inline iterator(const internal::tape_ref &tape) noexcept; + simdjson_inline iterator(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; friend class array; }; @@ -4145,7 +4198,7 @@ class array { inline simdjson_result at(size_t index) const noexcept; private: - simdjson_really_inline array(const internal::tape_ref &tape) noexcept; + simdjson_inline array(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; friend class element; friend struct simdjson_result; @@ -4160,9 +4213,9 @@ class array { template<> struct simdjson_result : public internal::simdjson_result_base { public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::array value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::array value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; inline simdjson_result at(size_t index) const noexcept; @@ -4357,20 +4410,20 @@ class parser { * to allocate an initial capacity, call allocate() after constructing the parser. * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). */ - simdjson_really_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + simdjson_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ - simdjson_really_inline parser(parser &&other) noexcept; + simdjson_inline parser(parser &&other) noexcept; parser(const parser &) = delete; ///< @private Disallow copying /** * Take another parser's buffers and state. * * @param other The parser to take. Its capacity is zeroed. */ - simdjson_really_inline parser &operator=(parser &&other) noexcept; + simdjson_inline parser &operator=(parser &&other) noexcept; parser &operator=(const parser &) = delete; ///< @private Disallow copying /** Deallocate the JSON parser. */ @@ -4480,17 +4533,20 @@ class parser { inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; - simdjson_really_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; + simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const std::string &s) & noexcept; + simdjson_inline simdjson_result parse(const std::string &s) && =delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse(const std::string &s) & noexcept; - simdjson_really_inline simdjson_result parse(const std::string &s) && =delete; + simdjson_inline simdjson_result parse(const padded_string &s) & noexcept; + simdjson_inline simdjson_result parse(const padded_string &s) && =delete; /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse(const padded_string &s) & noexcept; - simdjson_really_inline simdjson_result parse(const padded_string &s) && =delete; + simdjson_inline simdjson_result parse(const padded_string_view &v) & noexcept; + simdjson_inline simdjson_result parse(const padded_string_view &v) && =delete; /** @private We do not want to allow implicit conversion from C string to std::string. */ - simdjson_really_inline simdjson_result parse(const char *buf) noexcept = delete; + simdjson_inline simdjson_result parse(const char *buf) noexcept = delete; /** * Parse a JSON document into a provide document instance and return a temporary reference to it. @@ -4539,17 +4595,17 @@ class parser { inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; - simdjson_really_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete; + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete; /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const std::string &s) & noexcept; - simdjson_really_inline simdjson_result parse_into_document(document& doc, const std::string &s) && =delete; + simdjson_inline simdjson_result parse_into_document(document& doc, const std::string &s) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const std::string &s) && =delete; /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const padded_string &s) & noexcept; - simdjson_really_inline simdjson_result parse_into_document(document& doc, const padded_string &s) && =delete; + simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string &s) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string &s) && =delete; /** @private We do not want to allow implicit conversion from C string to std::string. */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const char *buf) noexcept = delete; + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf) noexcept = delete; /** * Load a file containing many JSON documents. @@ -4761,7 +4817,7 @@ class parser { * * @return Current capacity, in bytes. */ - simdjson_really_inline size_t capacity() const noexcept; + simdjson_inline size_t capacity() const noexcept; /** * The largest document this parser can automatically support. @@ -4770,14 +4826,14 @@ class parser { * * @return Maximum capacity, in bytes. */ - simdjson_really_inline size_t max_capacity() const noexcept; + simdjson_inline size_t max_capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ - simdjson_really_inline size_t max_depth() const noexcept; + simdjson_inline size_t max_depth() const noexcept; /** * Set max_capacity. This is the largest document this parser can automatically support. @@ -4793,7 +4849,7 @@ class parser { * * @param max_capacity The new maximum capacity, in bytes. */ - simdjson_really_inline void set_max_capacity(size_t max_capacity) noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; #ifdef SIMDJSON_THREADS_ENABLED /** @@ -4975,13 +5031,13 @@ class document_stream { * error = parser.parse_many(json).get(docs); * ``` */ - simdjson_really_inline document_stream() noexcept; + simdjson_inline document_stream() noexcept; /** Move one document_stream to another. */ - simdjson_really_inline document_stream(document_stream &&other) noexcept = default; + simdjson_inline document_stream(document_stream &&other) noexcept = default; /** Move one document_stream to another. */ - simdjson_really_inline document_stream &operator=(document_stream &&other) noexcept = default; + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; - simdjson_really_inline ~document_stream() noexcept; + simdjson_inline ~document_stream() noexcept; /** * Returns the input size in bytes. */ @@ -5020,11 +5076,11 @@ class document_stream { /** * Default constructor. */ - simdjson_really_inline iterator() noexcept; + simdjson_inline iterator() noexcept; /** * Get the current document (or error). */ - simdjson_really_inline reference operator*() noexcept; + simdjson_inline reference operator*() noexcept; /** * Advance to the next document (prefix). */ @@ -5033,7 +5089,7 @@ class document_stream { * Check if we're at the end yet. * @param other the end iterator to compare to. */ - simdjson_really_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator!=(const iterator &other) const noexcept; /** * @private * @@ -5049,7 +5105,7 @@ class document_stream { * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ - simdjson_really_inline size_t current_index() const noexcept; + simdjson_inline size_t current_index() const noexcept; /** * @private * @@ -5069,10 +5125,10 @@ class document_stream { * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ - simdjson_really_inline std::string_view source() const noexcept; + simdjson_inline std::string_view source() const noexcept; private: - simdjson_really_inline iterator(document_stream *s, bool finished) noexcept; + simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ document_stream* stream; /** Whether we're finished or not. */ @@ -5083,11 +5139,11 @@ class document_stream { /** * Start iterating the documents in the stream. */ - simdjson_really_inline iterator begin() noexcept; + simdjson_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ - simdjson_really_inline iterator end() noexcept; + simdjson_inline iterator end() noexcept; private: @@ -5103,7 +5159,7 @@ class document_stream { * @param len is the length of the raw byte buffer in bytes * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) */ - simdjson_really_inline document_stream( + simdjson_inline document_stream( dom::parser &parser, const uint8_t *buf, size_t len, @@ -5197,19 +5253,19 @@ class document_stream { template<> struct simdjson_result : public internal::simdjson_result_base { public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private #if SIMDJSON_EXCEPTIONS - simdjson_really_inline dom::document_stream::iterator begin() noexcept(false); - simdjson_really_inline dom::document_stream::iterator end() noexcept(false); + simdjson_inline dom::document_stream::iterator begin() noexcept(false); + simdjson_inline dom::document_stream::iterator end() noexcept(false); #else // SIMDJSON_EXCEPTIONS #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] - simdjson_really_inline dom::document_stream::iterator begin() noexcept; + simdjson_inline dom::document_stream::iterator begin() noexcept; [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] - simdjson_really_inline dom::document_stream::iterator end() noexcept; + simdjson_inline dom::document_stream::iterator end() noexcept; #endif // SIMDJSON_DISABLE_DEPRECATED_API #endif // SIMDJSON_EXCEPTIONS }; // struct simdjson_result @@ -5258,10 +5314,10 @@ enum class element_type { class element { public: /** Create a new, invalid element. */ - simdjson_really_inline element() noexcept; + simdjson_inline element() noexcept; /** The type of this element. */ - simdjson_really_inline element_type type() const noexcept; + simdjson_inline element_type type() const noexcept; /** * Cast this element to an array. @@ -5413,7 +5469,7 @@ class element { * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object */ template - simdjson_really_inline bool is() const noexcept; + simdjson_inline bool is() const noexcept; /** * Get the value as the provided type (T). @@ -5458,7 +5514,7 @@ class element { * @returns The error that occurred, or SUCCESS if there was no error. */ template - simdjson_warn_unused simdjson_really_inline error_code get(T &value) const noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept; /** * Get the value as the provided type (T), setting error if it's not the given type. @@ -5682,7 +5738,7 @@ class element { inline bool dump_raw_tape(std::ostream &out) const noexcept; private: - simdjson_really_inline element(const internal::tape_ref &tape) noexcept; + simdjson_inline element(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; friend class document; friend class object; @@ -5699,59 +5755,59 @@ class element { template<> struct simdjson_result : public internal::simdjson_result_base { public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::element &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::element &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result type() const noexcept; + simdjson_inline simdjson_result type() const noexcept; template - simdjson_really_inline bool is() const noexcept; + simdjson_inline bool is() const noexcept; template - simdjson_really_inline simdjson_result get() const noexcept; + simdjson_inline simdjson_result get() const noexcept; template - simdjson_warn_unused simdjson_really_inline error_code get(T &value) const noexcept; - - simdjson_really_inline simdjson_result get_array() const noexcept; - simdjson_really_inline simdjson_result get_object() const noexcept; - simdjson_really_inline simdjson_result get_c_str() const noexcept; - simdjson_really_inline simdjson_result get_string_length() const noexcept; - simdjson_really_inline simdjson_result get_string() const noexcept; - simdjson_really_inline simdjson_result get_int64() const noexcept; - simdjson_really_inline simdjson_result get_uint64() const noexcept; - simdjson_really_inline simdjson_result get_double() const noexcept; - simdjson_really_inline simdjson_result get_bool() const noexcept; - - simdjson_really_inline bool is_array() const noexcept; - simdjson_really_inline bool is_object() const noexcept; - simdjson_really_inline bool is_string() const noexcept; - simdjson_really_inline bool is_int64() const noexcept; - simdjson_really_inline bool is_uint64() const noexcept; - simdjson_really_inline bool is_double() const noexcept; - simdjson_really_inline bool is_number() const noexcept; - simdjson_really_inline bool is_bool() const noexcept; - simdjson_really_inline bool is_null() const noexcept; - - simdjson_really_inline simdjson_result operator[](std::string_view key) const noexcept; - simdjson_really_inline simdjson_result operator[](const char *key) const noexcept; - simdjson_really_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept; + + simdjson_inline simdjson_result get_array() const noexcept; + simdjson_inline simdjson_result get_object() const noexcept; + simdjson_inline simdjson_result get_c_str() const noexcept; + simdjson_inline simdjson_result get_string_length() const noexcept; + simdjson_inline simdjson_result get_string() const noexcept; + simdjson_inline simdjson_result get_int64() const noexcept; + simdjson_inline simdjson_result get_uint64() const noexcept; + simdjson_inline simdjson_result get_double() const noexcept; + simdjson_inline simdjson_result get_bool() const noexcept; + + simdjson_inline bool is_array() const noexcept; + simdjson_inline bool is_object() const noexcept; + simdjson_inline bool is_string() const noexcept; + simdjson_inline bool is_int64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; + simdjson_inline bool is_double() const noexcept; + simdjson_inline bool is_number() const noexcept; + simdjson_inline bool is_bool() const noexcept; + simdjson_inline bool is_null() const noexcept; + + simdjson_inline simdjson_result operator[](std::string_view key) const noexcept; + simdjson_inline simdjson_result operator[](const char *key) const noexcept; + simdjson_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] - simdjson_really_inline simdjson_result at(const std::string_view json_pointer) const noexcept; - simdjson_really_inline simdjson_result at(size_t index) const noexcept; - simdjson_really_inline simdjson_result at_key(std::string_view key) const noexcept; - simdjson_really_inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + simdjson_inline simdjson_result at(const std::string_view json_pointer) const noexcept; + simdjson_inline simdjson_result at(size_t index) const noexcept; + simdjson_inline simdjson_result at_key(std::string_view key) const noexcept; + simdjson_inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator bool() const noexcept(false); - simdjson_really_inline explicit operator const char*() const noexcept(false); - simdjson_really_inline operator std::string_view() const noexcept(false); - simdjson_really_inline operator uint64_t() const noexcept(false); - simdjson_really_inline operator int64_t() const noexcept(false); - simdjson_really_inline operator double() const noexcept(false); - simdjson_really_inline operator dom::array() const noexcept(false); - simdjson_really_inline operator dom::object() const noexcept(false); - - simdjson_really_inline dom::array::iterator begin() const noexcept(false); - simdjson_really_inline dom::array::iterator end() const noexcept(false); + simdjson_inline operator bool() const noexcept(false); + simdjson_inline explicit operator const char*() const noexcept(false); + simdjson_inline operator std::string_view() const noexcept(false); + simdjson_inline operator uint64_t() const noexcept(false); + simdjson_inline operator int64_t() const noexcept(false); + simdjson_inline operator double() const noexcept(false); + simdjson_inline operator dom::array() const noexcept(false); + simdjson_inline operator dom::object() const noexcept(false); + + simdjson_inline dom::array::iterator begin() const noexcept(false); + simdjson_inline dom::array::iterator end() const noexcept(false); #endif // SIMDJSON_EXCEPTIONS }; @@ -5782,7 +5838,7 @@ class key_value_pair; class object { public: /** Create a new, invalid object */ - simdjson_really_inline object() noexcept; + simdjson_inline object() noexcept; class iterator { public: @@ -5852,7 +5908,7 @@ class object { iterator(const iterator&) noexcept = default; iterator& operator=(const iterator&) noexcept = default; private: - simdjson_really_inline iterator(const internal::tape_ref &tape) noexcept; + simdjson_inline iterator(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; @@ -5966,7 +6022,7 @@ class object { inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; private: - simdjson_really_inline object(const internal::tape_ref &tape) noexcept; + simdjson_inline object(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; @@ -5987,7 +6043,7 @@ class key_value_pair { element value; private: - simdjson_really_inline key_value_pair(std::string_view _key, element _value) noexcept; + simdjson_inline key_value_pair(std::string_view _key, element _value) noexcept; friend class object; }; @@ -5997,9 +6053,9 @@ class key_value_pair { template<> struct simdjson_result : public internal::simdjson_result_base { public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::object value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::object value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private inline simdjson_result operator[](std::string_view key) const noexcept; inline simdjson_result operator[](const char *key) const noexcept; @@ -6069,7 +6125,7 @@ class string_builder { /** Append an object to the builder (to be printed) **/ inline void append(simdjson::dom::object value); /** Reset the builder (so that it would print the empty string) **/ - simdjson_really_inline void clear(); + simdjson_inline void clear(); /** * Get access to the string. The string_view is owned by the builder * and it is invalid to use it after the string_builder has been @@ -6077,9 +6133,9 @@ class string_builder { * However you can make a copy of the string_view on memory that you * own. */ - simdjson_really_inline std::string_view str() const; + simdjson_inline std::string_view str() const; /** Append a key_value_pair to the builder (to be printed) **/ - simdjson_really_inline void append(simdjson::dom::key_value_pair value); + simdjson_inline void append(simdjson::dom::key_value_pair value); private: formatter format{}; }; @@ -6093,43 +6149,43 @@ class mini_formatter { public: mini_formatter() = default; /** Add a comma **/ - simdjson_really_inline void comma(); + simdjson_inline void comma(); /** Start an array, prints [ **/ - simdjson_really_inline void start_array(); + simdjson_inline void start_array(); /** End an array, prints ] **/ - simdjson_really_inline void end_array(); + simdjson_inline void end_array(); /** Start an array, prints { **/ - simdjson_really_inline void start_object(); + simdjson_inline void start_object(); /** Start an array, prints } **/ - simdjson_really_inline void end_object(); + simdjson_inline void end_object(); /** Prints a true **/ - simdjson_really_inline void true_atom(); + simdjson_inline void true_atom(); /** Prints a false **/ - simdjson_really_inline void false_atom(); + simdjson_inline void false_atom(); /** Prints a null **/ - simdjson_really_inline void null_atom(); + simdjson_inline void null_atom(); /** Prints a number **/ - simdjson_really_inline void number(int64_t x); + simdjson_inline void number(int64_t x); /** Prints a number **/ - simdjson_really_inline void number(uint64_t x); + simdjson_inline void number(uint64_t x); /** Prints a number **/ - simdjson_really_inline void number(double x); + simdjson_inline void number(double x); /** Prints a key (string + colon) **/ - simdjson_really_inline void key(std::string_view unescaped); + simdjson_inline void key(std::string_view unescaped); /** Prints a string. The string is escaped as needed. **/ - simdjson_really_inline void string(std::string_view unescaped); + simdjson_inline void string(std::string_view unescaped); /** Clears out the content. **/ - simdjson_really_inline void clear(); + simdjson_inline void clear(); /** * Get access to the buffer, it is owned by the instance, but * the user can make a copy. **/ - simdjson_really_inline std::string_view str() const; + simdjson_inline std::string_view str() const; private: // implementation details (subject to change) /** Prints one character **/ - simdjson_really_inline void one_char(char c); + simdjson_inline void one_char(char c); /** Backing buffer **/ std::vector buffer{}; // not ideal! }; @@ -6758,11 +6814,11 @@ namespace simdjson { // // simdjson_result inline implementation // -simdjson_really_inline simdjson_result::simdjson_result() noexcept +simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} -simdjson_really_inline simdjson_result::simdjson_result(dom::array value) noexcept +simdjson_inline simdjson_result::simdjson_result(dom::array value) noexcept : internal::simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} #if SIMDJSON_EXCEPTIONS @@ -6796,21 +6852,26 @@ namespace dom { // // array inline implementation // -simdjson_really_inline array::array() noexcept : tape{} {} -simdjson_really_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} +simdjson_inline array::array() noexcept : tape{} {} +simdjson_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} inline array::iterator array::begin() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.json_index + 1); } inline array::iterator array::end() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.after_element() - 1); } inline size_t array::size() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.scope_count(); } inline size_t array::number_of_slots() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.matching_brace_index() - tape.json_index; } inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if(json_pointer.empty()) { // an empty string means that we return the current node return element(this->tape); // copy the current node } else if(json_pointer[0] != '/') { // otherwise there is an error @@ -6851,6 +6912,7 @@ inline simdjson_result array::at_pointer(std::string_view json_pointer) } inline simdjson_result array::at(size_t index) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 size_t i=0; for (auto element : *this) { if (i == index) { return element; } @@ -6862,7 +6924,7 @@ inline simdjson_result array::at(size_t index) const noexcept { // // array::iterator inline implementation // -simdjson_really_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +simdjson_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } inline element array::iterator::operator*() const noexcept { return element(tape); } @@ -6911,11 +6973,11 @@ namespace simdjson { // // simdjson_result inline implementation // -simdjson_really_inline simdjson_result::simdjson_result() noexcept +simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} -simdjson_really_inline simdjson_result::simdjson_result(dom::element &&value) noexcept +simdjson_inline simdjson_result::simdjson_result(dom::element &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} inline simdjson_result simdjson_result::type() const noexcept { if (error()) { return error(); } @@ -6923,101 +6985,101 @@ inline simdjson_result simdjson_result::type() } template -simdjson_really_inline bool simdjson_result::is() const noexcept { +simdjson_inline bool simdjson_result::is() const noexcept { return !error() && first.is(); } template -simdjson_really_inline simdjson_result simdjson_result::get() const noexcept { +simdjson_inline simdjson_result simdjson_result::get() const noexcept { if (error()) { return error(); } return first.get(); } template -simdjson_warn_unused simdjson_really_inline error_code simdjson_result::get(T &value) const noexcept { +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) const noexcept { if (error()) { return error(); } return first.get(value); } -simdjson_really_inline simdjson_result simdjson_result::get_array() const noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() const noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_really_inline simdjson_result simdjson_result::get_object() const noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() const noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_really_inline simdjson_result simdjson_result::get_c_str() const noexcept { +simdjson_inline simdjson_result simdjson_result::get_c_str() const noexcept { if (error()) { return error(); } return first.get_c_str(); } -simdjson_really_inline simdjson_result simdjson_result::get_string_length() const noexcept { +simdjson_inline simdjson_result simdjson_result::get_string_length() const noexcept { if (error()) { return error(); } return first.get_string_length(); } -simdjson_really_inline simdjson_result simdjson_result::get_string() const noexcept { +simdjson_inline simdjson_result simdjson_result::get_string() const noexcept { if (error()) { return error(); } return first.get_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_int64() const noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() const noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_really_inline simdjson_result simdjson_result::get_uint64() const noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() const noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_really_inline simdjson_result simdjson_result::get_double() const noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() const noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_really_inline simdjson_result simdjson_result::get_bool() const noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() const noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_really_inline bool simdjson_result::is_array() const noexcept { +simdjson_inline bool simdjson_result::is_array() const noexcept { return !error() && first.is_array(); } -simdjson_really_inline bool simdjson_result::is_object() const noexcept { +simdjson_inline bool simdjson_result::is_object() const noexcept { return !error() && first.is_object(); } -simdjson_really_inline bool simdjson_result::is_string() const noexcept { +simdjson_inline bool simdjson_result::is_string() const noexcept { return !error() && first.is_string(); } -simdjson_really_inline bool simdjson_result::is_int64() const noexcept { +simdjson_inline bool simdjson_result::is_int64() const noexcept { return !error() && first.is_int64(); } -simdjson_really_inline bool simdjson_result::is_uint64() const noexcept { +simdjson_inline bool simdjson_result::is_uint64() const noexcept { return !error() && first.is_uint64(); } -simdjson_really_inline bool simdjson_result::is_double() const noexcept { +simdjson_inline bool simdjson_result::is_double() const noexcept { return !error() && first.is_double(); } -simdjson_really_inline bool simdjson_result::is_number() const noexcept { +simdjson_inline bool simdjson_result::is_number() const noexcept { return !error() && first.is_number(); } -simdjson_really_inline bool simdjson_result::is_bool() const noexcept { +simdjson_inline bool simdjson_result::is_bool() const noexcept { return !error() && first.is_bool(); } -simdjson_really_inline bool simdjson_result::is_null() const noexcept { +simdjson_inline bool simdjson_result::is_null() const noexcept { return !error() && first.is_null(); } -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { if (error()) { return error(); } return first[key]; } -simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { if (error()) { return error(); } return first[key]; } -simdjson_really_inline simdjson_result simdjson_result::at_pointer(const std::string_view json_pointer) const noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(const std::string_view json_pointer) const noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] -simdjson_really_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { +simdjson_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING if (error()) { return error(); } @@ -7025,51 +7087,51 @@ SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_POP_DISABLE_WARNINGS } #endif // SIMDJSON_DISABLE_DEPRECATED_API -simdjson_really_inline simdjson_result simdjson_result::at(size_t index) const noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) const noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_really_inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { +simdjson_inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key(key); } -simdjson_really_inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { +simdjson_inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key_case_insensitive(key); } #if SIMDJSON_EXCEPTIONS -simdjson_really_inline simdjson_result::operator bool() const noexcept(false) { +simdjson_inline simdjson_result::operator bool() const noexcept(false) { return get(); } -simdjson_really_inline simdjson_result::operator const char *() const noexcept(false) { +simdjson_inline simdjson_result::operator const char *() const noexcept(false) { return get(); } -simdjson_really_inline simdjson_result::operator std::string_view() const noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() const noexcept(false) { return get(); } -simdjson_really_inline simdjson_result::operator uint64_t() const noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() const noexcept(false) { return get(); } -simdjson_really_inline simdjson_result::operator int64_t() const noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() const noexcept(false) { return get(); } -simdjson_really_inline simdjson_result::operator double() const noexcept(false) { +simdjson_inline simdjson_result::operator double() const noexcept(false) { return get(); } -simdjson_really_inline simdjson_result::operator dom::array() const noexcept(false) { +simdjson_inline simdjson_result::operator dom::array() const noexcept(false) { return get(); } -simdjson_really_inline simdjson_result::operator dom::object() const noexcept(false) { +simdjson_inline simdjson_result::operator dom::object() const noexcept(false) { return get(); } -simdjson_really_inline dom::array::iterator simdjson_result::begin() const noexcept(false) { +simdjson_inline dom::array::iterator simdjson_result::begin() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } -simdjson_really_inline dom::array::iterator simdjson_result::end() const noexcept(false) { +simdjson_inline dom::array::iterator simdjson_result::end() const noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } @@ -7081,15 +7143,17 @@ namespace dom { // // element inline implementation // -simdjson_really_inline element::element() noexcept : tape{} {} -simdjson_really_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +simdjson_inline element::element() noexcept : tape{} {} +simdjson_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { } inline element_type element::type() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 auto tape_type = tape.tape_ref_type(); return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast(tape_type); } inline simdjson_result element::get_bool() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if(tape.is_true()) { return true; } else if(tape.is_false()) { @@ -7098,6 +7162,7 @@ inline simdjson_result element::get_bool() const noexcept { return INCORRECT_TYPE; } inline simdjson_result element::get_c_str() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::STRING: { return tape.get_c_str(); @@ -7107,6 +7172,7 @@ inline simdjson_result element::get_c_str() const noexcept { } } inline simdjson_result element::get_string_length() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::STRING: { return tape.get_string_length(); @@ -7116,6 +7182,7 @@ inline simdjson_result element::get_string_length() const noexcept { } } inline simdjson_result element::get_string() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::STRING: return tape.get_string_view(); @@ -7124,6 +7191,7 @@ inline simdjson_result element::get_string() const noexcept { } } inline simdjson_result element::get_uint64() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if(simdjson_unlikely(!tape.is_uint64())) { // branch rarely taken if(tape.is_int64()) { int64_t result = tape.next_tape_value(); @@ -7137,6 +7205,7 @@ inline simdjson_result element::get_uint64() const noexcept { return tape.next_tape_value(); } inline simdjson_result element::get_int64() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if(simdjson_unlikely(!tape.is_int64())) { // branch rarely taken if(tape.is_uint64()) { uint64_t result = tape.next_tape_value(); @@ -7151,6 +7220,7 @@ inline simdjson_result element::get_int64() const noexcept { return tape.next_tape_value(); } inline simdjson_result element::get_double() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 // Performance considerations: // 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight // comparison. @@ -7172,6 +7242,7 @@ inline simdjson_result element::get_double() const noexcept { return tape.next_tape_value(); } inline simdjson_result element::get_array() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::START_ARRAY: return array(tape); @@ -7180,6 +7251,7 @@ inline simdjson_result element::get_array() const noexcept { } } inline simdjson_result element::get_object() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::START_OBJECT: return object(tape); @@ -7189,12 +7261,12 @@ inline simdjson_result element::get_object() const noexcept { } template -simdjson_warn_unused simdjson_really_inline error_code element::get(T &value) const noexcept { +simdjson_warn_unused simdjson_inline error_code element::get(T &value) const noexcept { return get().get(value); } // An element-specific version prevents recursion with simdjson_result::get(value) template<> -simdjson_warn_unused simdjson_really_inline error_code element::get(element &value) const noexcept { +simdjson_warn_unused simdjson_inline error_code element::get(element &value) const noexcept { value = element(tape); return SUCCESS; } @@ -7204,7 +7276,7 @@ inline void element::tie(T &value, error_code &error) && noexcept { } template -simdjson_really_inline bool element::is() const noexcept { +simdjson_inline bool element::is() const noexcept { auto result = get(); return !result.error(); } @@ -7259,6 +7331,7 @@ inline simdjson_result element::operator[](const char *key) const noexc } inline simdjson_result element::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 switch (tape.tape_ref_type()) { case internal::tape_type::START_OBJECT: return object(tape).at_pointer(json_pointer); @@ -7294,6 +7367,7 @@ inline simdjson_result element::at_key_case_insensitive(std::string_vie } inline bool element::dump_raw_tape(std::ostream &out) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.doc->dump_raw_tape(out); } @@ -7422,7 +7496,7 @@ inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_ } #endif -simdjson_really_inline document_stream::document_stream( +simdjson_inline document_stream::document_stream( dom::parser &_parser, const uint8_t *_buf, size_t _len, @@ -7444,7 +7518,7 @@ simdjson_really_inline document_stream::document_stream( #endif } -simdjson_really_inline document_stream::document_stream() noexcept +simdjson_inline document_stream::document_stream() noexcept : parser{nullptr}, buf{nullptr}, len{0}, @@ -7456,31 +7530,31 @@ simdjson_really_inline document_stream::document_stream() noexcept { } -simdjson_really_inline document_stream::~document_stream() noexcept { +simdjson_inline document_stream::~document_stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED worker.reset(); #endif } -simdjson_really_inline document_stream::iterator::iterator() noexcept +simdjson_inline document_stream::iterator::iterator() noexcept : stream{nullptr}, finished{true} { } -simdjson_really_inline document_stream::iterator document_stream::begin() noexcept { +simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. return iterator(this, error == EMPTY); } -simdjson_really_inline document_stream::iterator document_stream::end() noexcept { +simdjson_inline document_stream::iterator document_stream::end() noexcept { return iterator(this, true); } -simdjson_really_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept : stream{_stream}, finished{is_end} { } -simdjson_really_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept { +simdjson_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept { // Note that in case of error, we do not yet mark // the iterator as "finished": this detection is done // in the operator++ function since it is possible @@ -7490,7 +7564,7 @@ simdjson_really_inline document_stream::iterator::reference document_stream::ite return stream->parser->doc.root(); } -simdjson_really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { // If there is an error, then we want the iterator // to be finished, no matter what. (E.g., we do not // keep generating documents with errors, or go beyond @@ -7517,7 +7591,7 @@ simdjson_really_inline document_stream::iterator& document_stream::iterator::ope return *this; } -simdjson_really_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } @@ -7548,11 +7622,11 @@ inline void document_stream::start() noexcept { next(); } -simdjson_really_inline size_t document_stream::iterator::current_index() const noexcept { +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { return stream->doc_index; } -simdjson_really_inline std::string_view document_stream::iterator::source() const noexcept { +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { const char* start = reinterpret_cast(stream->buf) + current_index(); bool object_or_array = ((*start == '[') || (*start == '{')); if(object_or_array) { @@ -7645,32 +7719,32 @@ inline void document_stream::start_stage1_thread() noexcept { } // namespace dom -simdjson_really_inline simdjson_result::simdjson_result() noexcept +simdjson_inline simdjson_result::simdjson_result() noexcept : simdjson_result_base() { } -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : simdjson_result_base(error) { } -simdjson_really_inline simdjson_result::simdjson_result(dom::document_stream &&value) noexcept +simdjson_inline simdjson_result::simdjson_result(dom::document_stream &&value) noexcept : simdjson_result_base(std::forward(value)) { } #if SIMDJSON_EXCEPTIONS -simdjson_really_inline dom::document_stream::iterator simdjson_result::begin() noexcept(false) { +simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.begin(); } -simdjson_really_inline dom::document_stream::iterator simdjson_result::end() noexcept(false) { +simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first.end(); } #else // SIMDJSON_EXCEPTIONS #ifndef SIMDJSON_DISABLE_DEPRECATED_API -simdjson_really_inline dom::document_stream::iterator simdjson_result::begin() noexcept { +simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept { first.error = error(); return first.begin(); } -simdjson_really_inline dom::document_stream::iterator simdjson_result::end() noexcept { +simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept { first.error = error(); return first.end(); } @@ -7848,11 +7922,11 @@ namespace simdjson { // // simdjson_result inline implementation // -simdjson_really_inline simdjson_result::simdjson_result() noexcept +simdjson_inline simdjson_result::simdjson_result() noexcept : internal::simdjson_result_base() {} -simdjson_really_inline simdjson_result::simdjson_result(dom::object value) noexcept +simdjson_inline simdjson_result::simdjson_result(dom::object value) noexcept : internal::simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { @@ -7898,15 +7972,18 @@ namespace dom { // // object inline implementation // -simdjson_really_inline object::object() noexcept : tape{} {} -simdjson_really_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +simdjson_inline object::object() noexcept : tape{} {} +simdjson_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { } inline object::iterator object::begin() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.json_index + 1); } inline object::iterator object::end() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return internal::tape_ref(tape.doc, tape.after_element() - 1); } inline size_t object::size() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 return tape.scope_count(); } @@ -7917,6 +7994,7 @@ inline simdjson_result object::operator[](const char *key) const noexce return at_key(key); } inline simdjson_result object::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 if(json_pointer.empty()) { // an empty string means that we return the current node return element(this->tape); // copy the current node } else if(json_pointer[0] != '/') { // otherwise there is an error @@ -7985,7 +8063,7 @@ inline simdjson_result object::at_key_case_insensitive(std::string_view // // object::iterator inline implementation // -simdjson_really_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +simdjson_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } inline const key_value_pair object::iterator::operator*() const noexcept { return key_value_pair(key(), value()); } @@ -8589,12 +8667,12 @@ namespace dom { // // parser inline implementation // -simdjson_really_inline parser::parser(size_t max_capacity) noexcept +simdjson_inline parser::parser(size_t max_capacity) noexcept : _max_capacity{max_capacity}, loaded_bytes(nullptr) { } -simdjson_really_inline parser::parser(parser &&other) noexcept = default; -simdjson_really_inline parser &parser::operator=(parser &&other) noexcept = default; +simdjson_inline parser::parser(parser &&other) noexcept = default; +simdjson_inline parser &parser::operator=(parser &&other) noexcept = default; inline bool parser::is_valid() const noexcept { return valid; } inline int parser::get_error_code() const noexcept { return error; } @@ -8616,7 +8694,13 @@ inline simdjson_result parser::read_file(const std::string &path) noexce } // Get the file size - if(std::fseek(fp, 0, SEEK_END) < 0) { + int ret; +#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if(ret < 0) { std::fclose(fp); return IO_ERROR; } @@ -8692,13 +8776,13 @@ inline simdjson_result parser::parse_into_document(document& provided_d return provided_doc.root(); } -simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { return parse_into_document(provided_doc, reinterpret_cast(buf), len, realloc_if_needed); } -simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); } -simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { return parse_into_document(provided_doc, s.data(), s.length(), false); } @@ -8707,15 +8791,18 @@ inline simdjson_result parser::parse(const uint8_t *buf, size_t len, bo return parse_into_document(doc, buf, len, realloc_if_needed); } -simdjson_really_inline simdjson_result parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { +simdjson_inline simdjson_result parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { return parse(reinterpret_cast(buf), len, realloc_if_needed); } -simdjson_really_inline simdjson_result parser::parse(const std::string &s) & noexcept { +simdjson_inline simdjson_result parser::parse(const std::string &s) & noexcept { return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); } -simdjson_really_inline simdjson_result parser::parse(const padded_string &s) & noexcept { +simdjson_inline simdjson_result parser::parse(const padded_string &s) & noexcept { return parse(s.data(), s.length(), false); } +simdjson_inline simdjson_result parser::parse(const padded_string_view &v) & noexcept { + return parse(v.data(), v.length(), false); +} inline simdjson_result parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } @@ -8731,13 +8818,13 @@ inline simdjson_result parser::parse_many(const padded_string & return parse_many(s.data(), s.length(), batch_size); } -simdjson_really_inline size_t parser::capacity() const noexcept { +simdjson_inline size_t parser::capacity() const noexcept { return implementation ? implementation->capacity() : 0; } -simdjson_really_inline size_t parser::max_capacity() const noexcept { +simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_really_inline size_t parser::max_depth() const noexcept { +simdjson_inline size_t parser::max_depth() const noexcept { return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; } @@ -8790,7 +8877,7 @@ inline error_code parser::ensure_capacity(document& target_document, size_t desi return SUCCESS; } -simdjson_really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) { _max_capacity = max_capacity; } else { @@ -8815,39 +8902,41 @@ namespace internal { // // tape_ref inline implementation // -simdjson_really_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} -simdjson_really_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} +simdjson_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} +simdjson_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} -simdjson_really_inline bool tape_ref::is_document_root() const noexcept { +simdjson_inline bool tape_ref::is_document_root() const noexcept { return json_index == 1; // should we ever change the structure of the tape, this should get updated. } - +simdjson_inline bool tape_ref::usable() const noexcept { + return doc != nullptr; // when the document pointer is null, this tape_ref is uninitialized (should not be accessed). +} // Some value types have a specific on-tape word value. It can be faster // to check the type by doing a word-to-word comparison instead of extracting the // most significant 8 bits. -simdjson_really_inline bool tape_ref::is_double() const noexcept { +simdjson_inline bool tape_ref::is_double() const noexcept { constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56; return doc->tape[json_index] == tape_double; } -simdjson_really_inline bool tape_ref::is_int64() const noexcept { +simdjson_inline bool tape_ref::is_int64() const noexcept { constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56; return doc->tape[json_index] == tape_int64; } -simdjson_really_inline bool tape_ref::is_uint64() const noexcept { +simdjson_inline bool tape_ref::is_uint64() const noexcept { constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56; return doc->tape[json_index] == tape_uint64; } -simdjson_really_inline bool tape_ref::is_false() const noexcept { +simdjson_inline bool tape_ref::is_false() const noexcept { constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56; return doc->tape[json_index] == tape_false; } -simdjson_really_inline bool tape_ref::is_true() const noexcept { +simdjson_inline bool tape_ref::is_true() const noexcept { constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56; return doc->tape[json_index] == tape_true; } -simdjson_really_inline bool tape_ref::is_null_on_tape() const noexcept { +simdjson_inline bool tape_ref::is_null_on_tape() const noexcept { constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56; return doc->tape[json_index] == tape_null; } @@ -8865,21 +8954,21 @@ inline size_t tape_ref::after_element() const noexcept { return json_index + 1; } } -simdjson_really_inline tape_type tape_ref::tape_ref_type() const noexcept { +simdjson_inline tape_type tape_ref::tape_ref_type() const noexcept { return static_cast(doc->tape[json_index] >> 56); } -simdjson_really_inline uint64_t internal::tape_ref::tape_value() const noexcept { +simdjson_inline uint64_t internal::tape_ref::tape_value() const noexcept { return doc->tape[json_index] & internal::JSON_VALUE_MASK; } -simdjson_really_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { +simdjson_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { return uint32_t(doc->tape[json_index]); } -simdjson_really_inline uint32_t internal::tape_ref::scope_count() const noexcept { +simdjson_inline uint32_t internal::tape_ref::scope_count() const noexcept { return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); } template -simdjson_really_inline T tape_ref::next_tape_value() const noexcept { +simdjson_inline T tape_ref::next_tape_value() const noexcept { static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); // Though the following is tempting... // return *reinterpret_cast(&doc->tape[json_index + 1]); @@ -8890,14 +8979,14 @@ simdjson_really_inline T tape_ref::next_tape_value() const noexcept { return x; } -simdjson_really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { +simdjson_inline uint32_t internal::tape_ref::get_string_length() const noexcept { size_t string_buf_index = size_t(tape_value()); uint32_t len; std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); return len; } -simdjson_really_inline const char * internal::tape_ref::get_c_str() const noexcept { +simdjson_inline const char * internal::tape_ref::get_c_str() const noexcept { size_t string_buf_index = size_t(tape_value()); return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); } @@ -9023,19 +9112,19 @@ namespace internal { * Minifier/formatter code. **/ -simdjson_really_inline void mini_formatter::number(uint64_t x) { +simdjson_inline void mini_formatter::number(uint64_t x) { char number_buffer[24]; char *newp = fast_itoa(number_buffer, x); buffer.insert(buffer.end(), number_buffer, newp); } -simdjson_really_inline void mini_formatter::number(int64_t x) { +simdjson_inline void mini_formatter::number(int64_t x) { char number_buffer[24]; char *newp = fast_itoa(number_buffer, x); buffer.insert(buffer.end(), number_buffer, newp); } -simdjson_really_inline void mini_formatter::number(double x) { +simdjson_inline void mini_formatter::number(double x) { char number_buffer[24]; // Currently, passing the nullptr to the second argument is // safe because our implementation does not check the second @@ -9044,31 +9133,31 @@ simdjson_really_inline void mini_formatter::number(double x) { buffer.insert(buffer.end(), number_buffer, newp); } -simdjson_really_inline void mini_formatter::start_array() { one_char('['); } -simdjson_really_inline void mini_formatter::end_array() { one_char(']'); } -simdjson_really_inline void mini_formatter::start_object() { one_char('{'); } -simdjson_really_inline void mini_formatter::end_object() { one_char('}'); } -simdjson_really_inline void mini_formatter::comma() { one_char(','); } +simdjson_inline void mini_formatter::start_array() { one_char('['); } +simdjson_inline void mini_formatter::end_array() { one_char(']'); } +simdjson_inline void mini_formatter::start_object() { one_char('{'); } +simdjson_inline void mini_formatter::end_object() { one_char('}'); } +simdjson_inline void mini_formatter::comma() { one_char(','); } -simdjson_really_inline void mini_formatter::true_atom() { +simdjson_inline void mini_formatter::true_atom() { const char * s = "true"; buffer.insert(buffer.end(), s, s + 4); } -simdjson_really_inline void mini_formatter::false_atom() { +simdjson_inline void mini_formatter::false_atom() { const char * s = "false"; buffer.insert(buffer.end(), s, s + 5); } -simdjson_really_inline void mini_formatter::null_atom() { +simdjson_inline void mini_formatter::null_atom() { const char * s = "null"; buffer.insert(buffer.end(), s, s + 4); } -simdjson_really_inline void mini_formatter::one_char(char c) { buffer.push_back(c); } -simdjson_really_inline void mini_formatter::key(std::string_view unescaped) { +simdjson_inline void mini_formatter::one_char(char c) { buffer.push_back(c); } +simdjson_inline void mini_formatter::key(std::string_view unescaped) { string(unescaped); one_char(':'); } -simdjson_really_inline void mini_formatter::string(std::string_view unescaped) { +simdjson_inline void mini_formatter::string(std::string_view unescaped) { one_char('\"'); size_t i = 0; // Fast path for the case where we have no control character, no ", and no backslash. @@ -9151,7 +9240,7 @@ inline void mini_formatter::clear() { buffer.clear(); } -simdjson_really_inline std::string_view mini_formatter::str() const { +simdjson_inline std::string_view mini_formatter::str() const { return std::string_view(buffer.data(), buffer.size()); } @@ -9322,18 +9411,18 @@ inline void string_builder::append(simdjson::dom::array value) { } template -simdjson_really_inline void string_builder::append(simdjson::dom::key_value_pair kv) { +simdjson_inline void string_builder::append(simdjson::dom::key_value_pair kv) { format.key(kv.key); append(kv.value); } template -simdjson_really_inline void string_builder::clear() { +simdjson_inline void string_builder::clear() { format.clear(); } template -simdjson_really_inline std::string_view string_builder::str() const { +simdjson_inline std::string_view string_builder::str() const { return format.str(); } @@ -9592,7 +9681,7 @@ using namespace simdjson::dom; class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, size_t max_length, @@ -9650,10 +9739,11 @@ class dom_parser_implementation final : public internal::dom_parser_implementati simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -9702,6 +9792,8 @@ inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth( // you use visual studio or other compilers. #include +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + #endif // SIMDJSON_ARM64_INTRINSICS_H /* end file include/simdjson/arm64/intrinsics.h */ /* begin file include/simdjson/arm64/bitmanipulation.h */ @@ -9716,7 +9808,7 @@ namespace { // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline int trailing_zeroes(uint64_t input_num) { +simdjson_inline int trailing_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) @@ -9729,12 +9821,12 @@ simdjson_really_inline int trailing_zeroes(uint64_t input_num) { } /* result might be undefined when input_num is zero */ -simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } /* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { +simdjson_inline int leading_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) @@ -9749,7 +9841,7 @@ simdjson_really_inline int leading_zeroes(uint64_t input_num) { } /* result might be undefined when input_num is zero */ -simdjson_really_inline int count_ones(uint64_t input_num) { +simdjson_inline int count_ones(uint64_t input_num) { return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); } @@ -9768,7 +9860,7 @@ simdjson_really_inline int count_ones(uint64_t input_num) { #define SIMDJSON_PREFER_REVERSE_BITS 1 /* reverse the bits */ -simdjson_really_inline uint64_t reverse_bits(uint64_t input_num) { +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { uint64_t rev_bits; __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); return rev_bits; @@ -9781,13 +9873,13 @@ simdjson_really_inline uint64_t reverse_bits(uint64_t input_num) { * of such undefined behavior is never used. **/ SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); } #endif -simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO *result = value1 + value2; return *result < value1; @@ -9816,7 +9908,7 @@ namespace { // // For example, prefix_xor(00100100) == 00011100 // -simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) { +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { ///////////// // We could do this with PMULL, but it is apparently slow. // @@ -9873,7 +9965,7 @@ namespace { * You should not use this function except for compile-time constants: * it is not efficient. */ -simdjson_really_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, +simdjson_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) { @@ -9902,7 +9994,7 @@ simdjson_really_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint return x; } -simdjson_really_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, +simdjson_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8) { uint8x8_t x{}; x = vset_lane_u8(x1, x, 0); @@ -9917,7 +10009,7 @@ simdjson_really_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_ } // We have to do the same work for make_int8x16_t -simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, +simdjson_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6, int8_t x7, int8_t x8, int8_t x9, int8_t x10, int8_t x11, int8_t x12, int8_t x13, int8_t x14, int8_t x15, int8_t x16) { @@ -9963,24 +10055,24 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x static const int SIZE = sizeof(value); // Conversion from/to SIMD register - simdjson_really_inline base_u8(const uint8x16_t _value) : value(_value) {} - simdjson_really_inline operator const uint8x16_t&() const { return this->value; } - simdjson_really_inline operator uint8x16_t&() { return this->value; } + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } // Bit operations - simdjson_really_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } - simdjson_really_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } - simdjson_really_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } - simdjson_really_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - simdjson_really_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + simdjson_inline simd8 prev(const simd8 prev_chunk) const { return vextq_u8(prev_chunk, *this, 16 - N); } }; @@ -9991,17 +10083,17 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; - static simdjson_really_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } - simdjson_really_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} // False constructor - simdjson_really_inline simd8() : simd8(vdupq_n_u8(0)) {} + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} // Splat constructor - simdjson_really_inline simd8(bool _value) : simd8(splat(_value)) {} + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} // We return uint32_t instead of uint16_t because that seems to be more efficient for most // purposes (cutting it down to uint16_t costs performance in some compilers). - simdjson_really_inline uint32_t to_bitmask() const { + simdjson_inline uint32_t to_bitmask() const { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); @@ -10015,26 +10107,26 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } - simdjson_really_inline bool any() const { return vmaxvq_u8(*this) != 0; } + simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } }; // Unsigned bytes template<> struct simd8: base_u8 { - static simdjson_really_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } - static simdjson_really_inline uint8x16_t zero() { return vdupq_n_u8(0); } - static simdjson_really_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } - simdjson_really_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} // Zero constructor - simdjson_really_inline simd8() : simd8(zero()) {} + simdjson_inline simd8() : simd8(zero()) {} // Array constructor - simdjson_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Member-by-member initialization #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline simd8( + simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(make_uint8x16_t( @@ -10042,7 +10134,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x v8, v9, v10,v11,v12,v13,v14,v15 )) {} #else - simdjson_really_inline simd8( + simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(uint8x16_t{ @@ -10052,7 +10144,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x #endif // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { @@ -10063,44 +10155,44 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x } // Store to array - simdjson_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } // Order-specific operations - simdjson_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); } - simdjson_really_inline uint8_t min_val() const { return vminvq_u8(*this); } - simdjson_really_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } - simdjson_really_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } - simdjson_really_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } // Bit-specific operations - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template - simdjson_really_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } template - simdjson_really_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return lookup_table.apply_lookup_16_to(*this); } @@ -10113,7 +10205,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x // signature simd8 compress(uint16_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_really_inline void compress(uint16_t mask, L * output) const { + simdjson_inline void compress(uint16_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; @@ -10150,7 +10242,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a // bitset) to output1, then those corresponding to a 0 in the high half to output2. template - simdjson_really_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { using internal::thintable_epi8; uint8_t mask1 = uint8_t(mask); // least significant 8 bits uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits @@ -10169,7 +10261,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x } template - simdjson_really_inline simd8 lookup_16( + simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, @@ -10183,7 +10275,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x } template - simdjson_really_inline simd8 apply_lookup_16_to(const simd8 original) { + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { return vqtbl1q_u8(*this, simd8(original)); } }; @@ -10193,24 +10285,24 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x struct simd8 { int8x16_t value; - static simdjson_really_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } - static simdjson_really_inline simd8 zero() { return vdupq_n_s8(0); } - static simdjson_really_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } // Conversion from/to SIMD register - simdjson_really_inline simd8(const int8x16_t _value) : value{_value} {} - simdjson_really_inline operator const int8x16_t&() const { return this->value; } - simdjson_really_inline operator int8x16_t&() { return this->value; } + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } // Zero constructor - simdjson_really_inline simd8() : simd8(zero()) {} + simdjson_inline simd8() : simd8(zero()) {} // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const int8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline simd8( + simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(make_int8x16_t( @@ -10218,7 +10310,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x v8, v9, v10,v11,v12,v13,v14,v15 )) {} #else - simdjson_really_inline simd8( + simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(int8x16_t{ @@ -10227,7 +10319,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x }) {} #endif // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { @@ -10238,7 +10330,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x } // Store to array - simdjson_really_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } // Explicit conversion to/from unsigned // @@ -10246,35 +10338,35 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 // and relatively ugly and hard to read. #ifndef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} #endif - simdjson_really_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } // Math - simdjson_really_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } - simdjson_really_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + simdjson_inline simd8 prev(const simd8 prev_chunk) const { return vextq_s8(prev_chunk, *this, 16 - N); } // Perform a lookup assuming no value is larger than 16 template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return lookup_table.apply_lookup_16_to(*this); } template - simdjson_really_inline simd8 lookup_16( + simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, @@ -10288,7 +10380,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x } template - simdjson_really_inline simd8 apply_lookup_16_to(const simd8 original) { + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { return vqtbl1q_s8(*this, simd8(original)); } }; @@ -10303,22 +10395,22 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} - simdjson_really_inline void store(T ptr[64]) const { + simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); this->chunks[2].store(ptr+sizeof(simd8)*2); this->chunks[3].store(ptr+sizeof(simd8)*3); } - simdjson_really_inline simd8 reduce_or() const { + simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } - simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); // compute the prefix sum of the popcounts of each byte uint64_t offsets = popcounts * 0x0101010101010101; @@ -10329,7 +10421,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x return offsets >> 56; } - simdjson_really_inline uint64_t to_bitmask() const { + simdjson_inline uint64_t to_bitmask() const { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO const uint8x16_t bit_mask = make_uint8x16_t( 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, @@ -10349,7 +10441,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); } - simdjson_really_inline uint64_t eq(const T m) const { + simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, @@ -10359,7 +10451,7 @@ simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x ).to_bitmask(); } - simdjson_really_inline uint64_t lteq(const T m) const { + simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, @@ -10386,11 +10478,11 @@ namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -10421,7 +10513,7 @@ static inline uint32_t hex_to_u32_nocheck( // // Note: we assume that surrogates are treated separately // -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -10453,10 +10545,10 @@ simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { #ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -10470,7 +10562,7 @@ static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64 using internal::value128; -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; #if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #ifdef _M_ARM64 @@ -10506,13 +10598,13 @@ namespace atomparsing { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -10520,36 +10612,36 @@ simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -10575,18 +10667,18 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return bs_bits != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote -simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); @@ -10608,147 +10700,6 @@ simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(co } // namespace arm64 } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace arm64 { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_ARM64_STRINGPARSING_H /* end file include/simdjson/arm64/stringparsing.h */ /* begin file include/simdjson/arm64/numberparsing.h */ @@ -10761,7 +10712,7 @@ namespace { // we don't have SSE, so let us use a scalar function // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { uint64_t val; std::memcpy(&val, chars, sizeof(uint64_t)); val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; @@ -10814,7 +10765,7 @@ namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -10829,7 +10780,7 @@ simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponen // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -10889,7 +10840,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -11004,7 +10955,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -11112,7 +11063,7 @@ static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, dou // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -11139,7 +11090,7 @@ error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { +simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -11149,7 +11100,7 @@ simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { return true; } -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -11177,7 +11128,7 @@ simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *c return SUCCESS; } -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -11228,7 +11179,7 @@ simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t * return SUCCESS; } -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; @@ -11238,7 +11189,7 @@ simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, s } template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -11274,7 +11225,8 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. @@ -11294,20 +11246,20 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -11320,13 +11272,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -11470,7 +11422,7 @@ const uint8_t integer_string_finisher[256] = { NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -11520,7 +11472,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -11568,7 +11520,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( } // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. @@ -11618,12 +11570,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_ } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -11661,13 +11613,13 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -11704,24 +11656,24 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; + const uint8_t *const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -11733,11 +11685,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } + if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -11745,12 +11697,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in return negative ? (~i+1) : i; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -11817,19 +11769,19 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -11837,9 +11789,9 @@ simdjson_unused simdjson_really_inline simdjson_result is_integer(const ui return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -11863,13 +11815,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge } // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -11938,18 +11890,18 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -12016,7 +11968,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double_in_s if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -12069,7 +12021,7 @@ using namespace simdjson::dom; class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() : simdjson::implementation( + simdjson_inline implementation() : simdjson::implementation( "fallback", "Generic fallback implementation", 0 @@ -12131,10 +12083,11 @@ class dom_parser_implementation final : public internal::dom_parser_implementati simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -12203,7 +12156,7 @@ static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { #endif /* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { +simdjson_inline int leading_zeroes(uint64_t input_num) { #ifdef _MSC_VER unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) @@ -12232,11 +12185,11 @@ namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -12267,7 +12220,7 @@ static inline uint32_t hex_to_u32_nocheck( // // Note: we assume that surrogates are treated separately // -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -12299,10 +12252,10 @@ simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { #ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -12316,7 +12269,7 @@ static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64 using internal::value128; -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; #if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #ifdef _M_ARM64 @@ -12352,13 +12305,13 @@ namespace atomparsing { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -12366,36 +12319,36 @@ simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -12419,17 +12372,17 @@ namespace { struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 1; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - simdjson_really_inline bool has_quote_first() { return c == '"'; } - simdjson_really_inline bool has_backslash() { return c == '\\'; } - simdjson_really_inline int quote_index() { return c == '"' ? 0 : 1; } - simdjson_really_inline int backslash_index() { return c == '\\' ? 0 : 1; } + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } uint8_t c; }; // struct backslash_and_quote -simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // store to dest unconditionally - we can overwrite the bits we don't like later dst[0] = src[0]; return { src[0] }; @@ -12439,147 +12392,6 @@ simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(co } // namespace fallback } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace fallback { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace fallback -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_FALLBACK_STRINGPARSING_H /* end file include/simdjson/fallback/stringparsing.h */ /* begin file include/simdjson/fallback/numberparsing.h */ @@ -12597,14 +12409,14 @@ namespace simdjson { namespace fallback { namespace { // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) { +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { uint64_t val; memcpy(&val, chars, sizeof(uint64_t)); val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); } -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { return parse_eight_digits_unrolled(reinterpret_cast(chars)); } @@ -12653,7 +12465,7 @@ namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -12668,7 +12480,7 @@ simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponen // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -12728,7 +12540,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -12843,7 +12655,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -12951,7 +12763,7 @@ static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, dou // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -12978,7 +12790,7 @@ error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { +simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -12988,7 +12800,7 @@ simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { return true; } -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -13016,7 +12828,7 @@ simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *c return SUCCESS; } -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -13067,7 +12879,7 @@ simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t * return SUCCESS; } -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; @@ -13077,7 +12889,7 @@ simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, s } template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -13113,7 +12925,8 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. @@ -13133,20 +12946,20 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -13159,13 +12972,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -13309,7 +13122,7 @@ const uint8_t integer_string_finisher[256] = { NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -13359,7 +13172,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -13407,7 +13220,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( } // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. @@ -13457,12 +13270,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_ } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -13500,13 +13313,13 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -13543,24 +13356,24 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; + const uint8_t *const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -13572,11 +13385,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } + if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -13584,12 +13397,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in return negative ? (~i+1) : i; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -13656,19 +13469,19 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -13676,9 +13489,9 @@ simdjson_unused simdjson_really_inline simdjson_result is_integer(const ui return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -13702,13 +13515,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge } // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -13777,18 +13590,18 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -13855,7 +13668,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double_in_s if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -13916,7 +13729,7 @@ using namespace simdjson; class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() : simdjson::implementation( + simdjson_inline implementation() : simdjson::implementation( "icelake", "Intel/AMD AVX512", internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 @@ -13990,6 +13803,8 @@ class implementation final : public simdjson::implementation { #endif // _blsr_u64 #endif // SIMDJSON_CLANG_VISUAL_STUDIO +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + #endif // SIMDJSON_ICELAKE_INTRINSICS_H /* end file include/simdjson/icelake/intrinsics.h */ @@ -14039,10 +13854,11 @@ class dom_parser_implementation final : public internal::dom_parser_implementati simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -14095,7 +13911,7 @@ namespace { // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline int trailing_zeroes(uint64_t input_num) { +simdjson_inline int trailing_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO return (int)_tzcnt_u64(input_num); #else // SIMDJSON_REGULAR_VISUAL_STUDIO @@ -14109,27 +13925,27 @@ simdjson_really_inline int trailing_zeroes(uint64_t input_num) { } /* result might be undefined when input_num is zero */ -simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return _blsr_u64(input_num); } /* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { +simdjson_inline int leading_zeroes(uint64_t input_num) { return int(_lzcnt_u64(input_num)); } #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows return __popcnt64(input_num);// Visual Studio wants two underscores } #else -simdjson_really_inline long long int count_ones(uint64_t input_num) { +simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif -simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, @@ -14159,7 +13975,7 @@ namespace { // // For example, prefix_xor(00100100) == 00011100 // -simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processor supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); @@ -14215,23 +14031,23 @@ namespace simd { __m512i value; // Zero constructor - simdjson_really_inline base() : value{__m512i()} {} + simdjson_inline base() : value{__m512i()} {} // Conversion from SIMD register - simdjson_really_inline base(const __m512i _value) : value(_value) {} + simdjson_inline base(const __m512i _value) : value(_value) {} // Conversion to SIMD register - simdjson_really_inline operator const __m512i&() const { return this->value; } - simdjson_really_inline operator __m512i&() { return this->value; } + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } // Bit operations - simdjson_really_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } - simdjson_really_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } - simdjson_really_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } - simdjson_really_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } - simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; // Forward-declared so they can be used by splat and friends. @@ -14243,15 +14059,17 @@ namespace simd { typedef uint32_t bitmask_t; typedef uint64_t bitmask2_t; - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m512i _value) : base>(_value) {} + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} - simdjson_really_inline uint64_t operator==(const simd8 other) const { return _mm512_cmpeq_epi8_mask(*this, other); } + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } static const int SIZE = sizeof(base::value); template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + simdjson_inline simd8 prev(const simd8 prev_chunk) const { #if SIMDJSON_GCC8 // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) constexpr int shift = 16 - N; @@ -14265,25 +14083,25 @@ namespace simd { // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { - static simdjson_really_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m512i _value) : base8(_value) {} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} // Splat constructor - simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_really_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } - simdjson_really_inline simd8 operator~() const { return *this ^ true; } + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { - static simdjson_really_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } - static simdjson_really_inline simd8 zero() { return _mm512_setzero_si512(); } - static simdjson_really_inline simd8 load(const T values[64]) { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { return _mm512_loadu_si512(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16( + static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { @@ -14299,24 +14117,24 @@ namespace simd { ); } - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m512i _value) : base8(_value) {} + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} // Store to array - simdjson_really_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm512_shuffle_epi8(lookup_table, *this); } @@ -14328,12 +14146,12 @@ namespace simd { // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_really_inline void compress(uint64_t mask, L * output) const { + simdjson_inline void compress(uint64_t mask, L * output) const { _mm512_mask_compressstoreu_epi8 (output,~mask,*this); } template - simdjson_really_inline simd8 lookup_16( + simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, @@ -14350,14 +14168,14 @@ namespace simd { // Signed bytes template<> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m512i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, @@ -14378,7 +14196,7 @@ namespace simd { )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { @@ -14395,24 +14213,24 @@ namespace simd { } // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } }; // Unsigned bytes template<> struct simd8: base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m512i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, @@ -14433,7 +14251,7 @@ namespace simd { )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { @@ -14450,42 +14268,42 @@ namespace simd { } // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } // Order-specific operations - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_really_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_really_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_really_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } - simdjson_really_inline bool bits_not_set_anywhere() const { + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return !_mm512_test_epi8_mask(*this, *this); } - simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template - simdjson_really_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template - simdjson_really_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template - simdjson_really_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } }; template @@ -14498,40 +14316,40 @@ namespace simd { simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_really_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} - simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { this->chunks[0].compress(mask, output); return 64 - count_ones(mask); } - simdjson_really_inline void store(T ptr[64]) const { + simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); } - simdjson_really_inline simd8 reduce_or() const { + simdjson_inline simd8 reduce_or() const { return this->chunks[0]; } - simdjson_really_inline simd8x64 bit_or(const T m) const { + simdjson_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] | mask ); } - simdjson_really_inline uint64_t eq(const T m) const { + simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return this->chunks[0] == mask; } - simdjson_really_inline uint64_t eq(const simd8x64 &other) const { + simdjson_inline uint64_t eq(const simd8x64 &other) const { return this->chunks[0] == other.chunks[0]; } - simdjson_really_inline uint64_t lteq(const T m) const { + simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return this->chunks[0] <= mask; } @@ -14554,11 +14372,11 @@ namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -14589,7 +14407,7 @@ static inline uint32_t hex_to_u32_nocheck( // // Note: we assume that surrogates are treated separately // -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -14621,10 +14439,10 @@ simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { #ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -14638,7 +14456,7 @@ static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64 using internal::value128; -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; #if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #ifdef _M_ARM64 @@ -14674,13 +14492,13 @@ namespace atomparsing { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -14688,36 +14506,36 @@ simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -14743,18 +14561,18 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint64_t bs_bits; uint64_t quote_bits; }; // struct backslash_and_quote -simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 15 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); @@ -14771,147 +14589,6 @@ simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(co } // namespace icelake } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace icelake { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_ICELAKE_STRINGPARSING_H /* end file include/simdjson/icelake/stringparsing.h */ /* begin file include/simdjson/icelake/numberparsing.h */ @@ -14922,7 +14599,7 @@ namespace simdjson { namespace icelake { namespace { -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = @@ -14985,7 +14662,7 @@ namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -15000,7 +14677,7 @@ simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponen // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -15060,7 +14737,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -15175,7 +14852,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -15283,7 +14960,7 @@ static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, dou // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -15310,7 +14987,7 @@ error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { +simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -15320,7 +14997,7 @@ simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { return true; } -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -15348,7 +15025,7 @@ simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *c return SUCCESS; } -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -15399,7 +15076,7 @@ simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t * return SUCCESS; } -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; @@ -15409,7 +15086,7 @@ simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, s } template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -15445,7 +15122,8 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. @@ -15465,20 +15143,20 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -15491,13 +15169,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -15641,7 +15319,7 @@ const uint8_t integer_string_finisher[256] = { NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -15691,7 +15369,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -15739,7 +15417,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( } // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. @@ -15789,12 +15467,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_ } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -15832,13 +15510,13 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -15875,24 +15553,24 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; + const uint8_t *const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -15904,11 +15582,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } + if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -15916,12 +15594,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in return negative ? (~i+1) : i; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -15988,19 +15666,19 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -16008,9 +15686,9 @@ simdjson_unused simdjson_really_inline simdjson_result is_integer(const ui return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -16034,13 +15712,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge } // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -16109,18 +15787,18 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -16187,7 +15865,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double_in_s if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -16249,7 +15927,7 @@ using namespace simdjson; class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() : simdjson::implementation( + simdjson_inline implementation() : simdjson::implementation( "haswell", "Intel/AMD AVX2", internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 @@ -16315,6 +15993,8 @@ class implementation final : public simdjson::implementation { #endif // _blsr_u64 #endif // SIMDJSON_CLANG_VISUAL_STUDIO +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + #endif // SIMDJSON_HASWELL_INTRINSICS_H /* end file include/simdjson/haswell/intrinsics.h */ @@ -16364,10 +16044,11 @@ class dom_parser_implementation final : public internal::dom_parser_implementati simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -16420,7 +16101,7 @@ namespace { // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline int trailing_zeroes(uint64_t input_num) { +simdjson_inline int trailing_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO return (int)_tzcnt_u64(input_num); #else // SIMDJSON_REGULAR_VISUAL_STUDIO @@ -16434,27 +16115,27 @@ simdjson_really_inline int trailing_zeroes(uint64_t input_num) { } /* result might be undefined when input_num is zero */ -simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return _blsr_u64(input_num); } /* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { +simdjson_inline int leading_zeroes(uint64_t input_num) { return int(_lzcnt_u64(input_num)); } #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows return __popcnt64(input_num);// Visual Studio wants two underscores } #else -simdjson_really_inline long long int count_ones(uint64_t input_num) { +simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif -simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, @@ -16484,7 +16165,7 @@ namespace { // // For example, prefix_xor(00100100) == 00011100 // -simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processor supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); @@ -16514,23 +16195,23 @@ namespace simd { __m256i value; // Zero constructor - simdjson_really_inline base() : value{__m256i()} {} + simdjson_inline base() : value{__m256i()} {} // Conversion from SIMD register - simdjson_really_inline base(const __m256i _value) : value(_value) {} + simdjson_inline base(const __m256i _value) : value(_value) {} // Conversion to SIMD register - simdjson_really_inline operator const __m256i&() const { return this->value; } - simdjson_really_inline operator __m256i&() { return this->value; } + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } // Bit operations - simdjson_really_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } - simdjson_really_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } - simdjson_really_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } - simdjson_really_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } - simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; // Forward-declared so they can be used by splat and friends. @@ -16542,15 +16223,15 @@ namespace simd { typedef uint32_t bitmask_t; typedef uint64_t bitmask2_t; - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m256i _value) : base>(_value) {} + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base::value); template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + simdjson_inline simd8 prev(const simd8 prev_chunk) const { return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); } }; @@ -16558,27 +16239,27 @@ namespace simd { // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { - static simdjson_really_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m256i _value) : base8(_value) {} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} // Splat constructor - simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_really_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } - simdjson_really_inline bool any() const { return !_mm256_testz_si256(*this, *this); } - simdjson_really_inline simd8 operator~() const { return *this ^ true; } + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { - static simdjson_really_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } - static simdjson_really_inline simd8 zero() { return _mm256_setzero_si256(); } - static simdjson_really_inline simd8 load(const T values[32]) { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { return _mm256_loadu_si256(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16( + static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { @@ -16590,24 +16271,24 @@ namespace simd { ); } - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m256i _value) : base8(_value) {} + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} // Store to array - simdjson_really_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm256_shuffle_epi8(lookup_table, *this); } @@ -16619,7 +16300,7 @@ namespace simd { // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_really_inline void compress(uint32_t mask, L * output) const { + simdjson_inline void compress(uint32_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; @@ -16665,7 +16346,7 @@ namespace simd { } template - simdjson_really_inline simd8 lookup_16( + simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, @@ -16682,14 +16363,14 @@ namespace simd { // Signed bytes template<> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m256i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, @@ -16701,7 +16382,7 @@ namespace simd { v24,v25,v26,v27,v28,v29,v30,v31 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { @@ -16714,23 +16395,23 @@ namespace simd { } // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } }; // Unsigned bytes template<> struct simd8: base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m256i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, @@ -16742,7 +16423,7 @@ namespace simd { v24,v25,v26,v27,v28,v29,v30,v31 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { @@ -16755,39 +16436,39 @@ namespace simd { } // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } // Order-specific operations - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_really_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_really_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_really_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } - simdjson_really_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } - simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template - simdjson_really_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template - simdjson_really_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template - simdjson_really_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } }; template @@ -16800,10 +16481,10 @@ namespace simd { simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} - simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { uint32_t mask1 = uint32_t(mask); uint32_t mask2 = uint32_t(mask >> 32); this->chunks[0].compress(mask1, output); @@ -16811,22 +16492,22 @@ namespace simd { return 64 - count_ones(mask); } - simdjson_really_inline void store(T ptr[64]) const { + simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); } - simdjson_really_inline uint64_t to_bitmask() const { + simdjson_inline uint64_t to_bitmask() const { uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); uint64_t r_hi = this->chunks[1].to_bitmask(); return r_lo | (r_hi << 32); } - simdjson_really_inline simd8 reduce_or() const { + simdjson_inline simd8 reduce_or() const { return this->chunks[0] | this->chunks[1]; } - simdjson_really_inline simd8x64 bit_or(const T m) const { + simdjson_inline simd8x64 bit_or(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] | mask, @@ -16834,7 +16515,7 @@ namespace simd { ); } - simdjson_really_inline uint64_t eq(const T m) const { + simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, @@ -16842,14 +16523,14 @@ namespace simd { ).to_bitmask(); } - simdjson_really_inline uint64_t eq(const simd8x64 &other) const { + simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1] ).to_bitmask(); } - simdjson_really_inline uint64_t lteq(const T m) const { + simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, @@ -16875,11 +16556,11 @@ namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -16910,7 +16591,7 @@ static inline uint32_t hex_to_u32_nocheck( // // Note: we assume that surrogates are treated separately // -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -16942,10 +16623,10 @@ simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { #ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -16959,7 +16640,7 @@ static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64 using internal::value128; -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; #if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #ifdef _M_ARM64 @@ -16995,13 +16676,13 @@ namespace atomparsing { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -17009,36 +16690,36 @@ simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -17064,18 +16745,18 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote -simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 15 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); @@ -17092,147 +16773,6 @@ simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(co } // namespace haswell } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace haswell { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_HASWELL_STRINGPARSING_H /* end file include/simdjson/haswell/stringparsing.h */ /* begin file include/simdjson/haswell/numberparsing.h */ @@ -17243,7 +16783,7 @@ namespace simdjson { namespace haswell { namespace { -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = @@ -17306,7 +16846,7 @@ namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -17321,7 +16861,7 @@ simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponen // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -17381,7 +16921,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -17496,7 +17036,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -17604,7 +17144,7 @@ static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, dou // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -17631,7 +17171,7 @@ error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { +simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -17641,7 +17181,7 @@ simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { return true; } -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -17669,7 +17209,7 @@ simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *c return SUCCESS; } -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -17720,7 +17260,7 @@ simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t * return SUCCESS; } -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; @@ -17730,7 +17270,7 @@ simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, s } template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -17766,7 +17306,8 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. @@ -17786,20 +17327,20 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -17812,13 +17353,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -17962,7 +17503,7 @@ const uint8_t integer_string_finisher[256] = { NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -18012,7 +17553,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -18060,7 +17601,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( } // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. @@ -18110,12 +17651,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_ } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -18153,13 +17694,13 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -18196,24 +17737,24 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; + const uint8_t *const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -18225,11 +17766,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } + if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -18237,12 +17778,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in return negative ? (~i+1) : i; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -18309,19 +17850,19 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -18329,9 +17870,9 @@ simdjson_unused simdjson_really_inline simdjson_result is_integer(const ui return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -18355,13 +17896,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge } // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -18430,18 +17971,18 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -18508,7 +18049,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double_in_s if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -18561,7 +18102,7 @@ using namespace simdjson::dom; class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() + simdjson_inline implementation() : simdjson::implementation("ppc64", "PPC64 ALTIVEC", internal::instruction_set::ALTIVEC) {} simdjson_warn_unused error_code create_dom_parser_implementation( @@ -18623,10 +18164,11 @@ class dom_parser_implementation final : public internal::dom_parser_implementati simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -18685,6 +18227,8 @@ inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth( #undef vector #endif +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + #endif // SIMDJSON_PPC64_INTRINSICS_H /* end file include/simdjson/ppc64/intrinsics.h */ /* begin file include/simdjson/ppc64/bitmanipulation.h */ @@ -18699,7 +18243,7 @@ namespace { // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline int trailing_zeroes(uint64_t input_num) { +simdjson_inline int trailing_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) @@ -18712,12 +18256,12 @@ simdjson_really_inline int trailing_zeroes(uint64_t input_num) { } /* result might be undefined when input_num is zero */ -simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num - 1); } /* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { +simdjson_inline int leading_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) @@ -18732,17 +18276,17 @@ simdjson_really_inline int leading_zeroes(uint64_t input_num) { } #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_really_inline int count_ones(uint64_t input_num) { +simdjson_inline int count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows return __popcnt64(input_num); // Visual Studio wants two underscores } #else -simdjson_really_inline int count_ones(uint64_t input_num) { +simdjson_inline int count_ones(uint64_t input_num) { return __builtin_popcountll(input_num); } #endif -simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO *result = value1 + value2; @@ -18773,7 +18317,7 @@ namespace { // // For example, prefix_xor(00100100) == 00011100 // -simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) { +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { // You can use the version below, however gcc sometimes miscompiles // vec_pmsum_be, it happens somewhere around between 8 and 9th version. // The performance boost was not noticeable, falling back to a usual @@ -18820,41 +18364,41 @@ template struct base { __m128i value; // Zero constructor - simdjson_really_inline base() : value{__m128i()} {} + simdjson_inline base() : value{__m128i()} {} // Conversion from SIMD register - simdjson_really_inline base(const __m128i _value) : value(_value) {} + simdjson_inline base(const __m128i _value) : value(_value) {} // Conversion to SIMD register - simdjson_really_inline operator const __m128i &() const { + simdjson_inline operator const __m128i &() const { return this->value; } - simdjson_really_inline operator __m128i &() { return this->value; } + simdjson_inline operator __m128i &() { return this->value; } // Bit operations - simdjson_really_inline Child operator|(const Child other) const { + simdjson_inline Child operator|(const Child other) const { return vec_or(this->value, (__m128i)other); } - simdjson_really_inline Child operator&(const Child other) const { + simdjson_inline Child operator&(const Child other) const { return vec_and(this->value, (__m128i)other); } - simdjson_really_inline Child operator^(const Child other) const { + simdjson_inline Child operator^(const Child other) const { return vec_xor(this->value, (__m128i)other); } - simdjson_really_inline Child bit_andnot(const Child other) const { + simdjson_inline Child bit_andnot(const Child other) const { return vec_andc(this->value, (__m128i)other); } - simdjson_really_inline Child &operator|=(const Child other) { + simdjson_inline Child &operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline Child &operator&=(const Child other) { + simdjson_inline Child &operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline Child &operator^=(const Child other) { + simdjson_inline Child &operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; @@ -18869,17 +18413,17 @@ struct base8 : base> { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m128i _value) : base>(_value) {} + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); } static const int SIZE = sizeof(base>::value); template - simdjson_really_inline simd8 prev(simd8 prev_chunk) const { + simdjson_inline simd8 prev(simd8 prev_chunk) const { __m128i chunk = this->value; #ifdef __LITTLE_ENDIAN__ chunk = (__m128i)vec_reve(this->value); @@ -18895,18 +18439,18 @@ struct base8 : base> { // SIMD byte mask type (returned by things like eq and gt) template <> struct simd8 : base8 { - static simdjson_really_inline simd8 splat(bool _value) { + static simdjson_inline simd8 splat(bool _value) { return (__m128i)vec_splats((unsigned char)(-(!!_value))); } - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m128i _value) + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor - simdjson_really_inline simd8(bool _value) + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_really_inline int to_bitmask() const { + simdjson_inline int to_bitmask() const { __vector unsigned long long result; const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; @@ -18919,25 +18463,25 @@ template <> struct simd8 : base8 { return static_cast(result[0]); #endif } - simdjson_really_inline bool any() const { + simdjson_inline bool any() const { return !vec_all_eq(this->value, (__m128i)vec_splats(0)); } - simdjson_really_inline simd8 operator~() const { + simdjson_inline simd8 operator~() const { return this->value ^ (__m128i)splat(true); } }; template struct base8_numeric : base8 { - static simdjson_really_inline simd8 splat(T value) { + static simdjson_inline simd8 splat(T value) { (void)value; return (__m128i)vec_splats(value); } - static simdjson_really_inline simd8 zero() { return splat(0); } - static simdjson_really_inline simd8 load(const T values[16]) { + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); } // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) { @@ -18945,30 +18489,30 @@ template struct base8_numeric : base8 { v14, v15); } - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m128i _value) + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} // Store to array - simdjson_really_inline void store(T dst[16]) const { + simdjson_inline void store(T dst[16]) const { vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); } // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { + simdjson_inline simd8 operator+(const simd8 other) const { return (__m128i)((__m128i)this->value + (__m128i)other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { + simdjson_inline simd8 operator-(const simd8 other) const { return (__m128i)((__m128i)this->value - (__m128i)other); } - simdjson_really_inline simd8 &operator+=(const simd8 other) { + simdjson_inline simd8 &operator+=(const simd8 other) { *this = *this + other; return *static_cast *>(this); } - simdjson_really_inline simd8 &operator-=(const simd8 other) { + simdjson_inline simd8 &operator-=(const simd8 other) { *this = *this - other; return *static_cast *>(this); } @@ -18976,7 +18520,7 @@ template struct base8_numeric : base8 { // Perform a lookup assuming the value is between 0 and 16 (undefined behavior // for out of range values) template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); } @@ -18987,7 +18531,7 @@ template struct base8_numeric : base8 { // seems like a function with the signature simd8 compress(uint32_t mask) // would be sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_really_inline void compress(uint16_t mask, L *output) const { + simdjson_inline void compress(uint16_t mask, L *output) const { using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; using internal::thintable_epi8; @@ -19026,7 +18570,7 @@ template struct base8_numeric : base8 { } template - simdjson_really_inline simd8 + simdjson_inline simd8 lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, L replace12, L replace13, L replace14, @@ -19040,15 +18584,15 @@ template struct base8_numeric : base8 { // Signed bytes template <> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const int8_t *values) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15) @@ -19056,7 +18600,7 @@ template <> struct simd8 : base8_numeric { v8, v9, v10, v11, v12, v13, v14, v15}) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 + simdjson_inline static simd8 repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15) { @@ -19065,22 +18609,22 @@ template <> struct simd8 : base8_numeric { } // Order-sensitive comparisons - simdjson_really_inline simd8 + simdjson_inline simd8 max_val(const simd8 other) const { return (__m128i)vec_max((__vector signed char)this->value, (__vector signed char)(__m128i)other); } - simdjson_really_inline simd8 + simdjson_inline simd8 min_val(const simd8 other) const { return (__m128i)vec_min((__vector signed char)this->value, (__vector signed char)(__m128i)other); } - simdjson_really_inline simd8 + simdjson_inline simd8 operator>(const simd8 other) const { return (__m128i)vec_cmpgt((__vector signed char)this->value, (__vector signed char)(__m128i)other); } - simdjson_really_inline simd8 + simdjson_inline simd8 operator<(const simd8 other) const { return (__m128i)vec_cmplt((__vector signed char)this->value, (__vector signed char)(__m128i)other); @@ -19089,22 +18633,22 @@ template <> struct simd8 : base8_numeric { // Unsigned bytes template <> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline + simdjson_inline simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 + simdjson_inline static simd8 repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, @@ -19114,82 +18658,82 @@ template <> struct simd8 : base8_numeric { } // Saturated math - simdjson_really_inline simd8 + simdjson_inline simd8 saturating_add(const simd8 other) const { return (__m128i)vec_adds(this->value, (__m128i)other); } - simdjson_really_inline simd8 + simdjson_inline simd8 saturating_sub(const simd8 other) const { return (__m128i)vec_subs(this->value, (__m128i)other); } // Order-specific operations - simdjson_really_inline simd8 + simdjson_inline simd8 max_val(const simd8 other) const { return (__m128i)vec_max(this->value, (__m128i)other); } - simdjson_really_inline simd8 + simdjson_inline simd8 min_val(const simd8 other) const { return (__m128i)vec_min(this->value, (__m128i)other); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_really_inline simd8 + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_really_inline simd8 + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_really_inline simd8 + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_really_inline simd8 + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { + simdjson_inline simd8 bits_not_set() const { return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_really_inline simd8 any_bits_set() const { + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_really_inline bool bits_not_set_anywhere() const { + simdjson_inline bool bits_not_set_anywhere() const { return vec_all_eq(this->value, (__m128i)vec_splats(0)); } - simdjson_really_inline bool any_bits_set_anywhere() const { + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return vec_all_eq(vec_and(this->value, (__m128i)bits), (__m128i)vec_splats(0)); } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template simdjson_really_inline simd8 shr() const { + template simdjson_inline simd8 shr() const { return simd8( (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); } - template simdjson_really_inline simd8 shl() const { + template simdjson_inline simd8 shl() const { return simd8( (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); } @@ -19206,26 +18750,26 @@ template struct simd8x64 { operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_really_inline simd8x64(const T ptr[64]) + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr + 16), simd8::load(ptr + 32), simd8::load(ptr + 48)} {} - simdjson_really_inline void store(T ptr[64]) const { + simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr + sizeof(simd8) * 0); this->chunks[1].store(ptr + sizeof(simd8) * 1); this->chunks[2].store(ptr + sizeof(simd8) * 2); this->chunks[3].store(ptr + sizeof(simd8) * 3); } - simdjson_really_inline simd8 reduce_or() const { + simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } - simdjson_really_inline uint64_t compress(uint64_t mask, T *output) const { + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); @@ -19236,7 +18780,7 @@ template struct simd8x64 { return 64 - count_ones(mask); } - simdjson_really_inline uint64_t to_bitmask() const { + simdjson_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); uint64_t r1 = this->chunks[1].to_bitmask(); uint64_t r2 = this->chunks[2].to_bitmask(); @@ -19244,14 +18788,14 @@ template struct simd8x64 { return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } - simdjson_really_inline uint64_t eq(const T m) const { + simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, this->chunks[2] == mask, this->chunks[3] == mask) .to_bitmask(); } - simdjson_really_inline uint64_t eq(const simd8x64 &other) const { + simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64(this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1], this->chunks[2] == other.chunks[2], @@ -19259,7 +18803,7 @@ template struct simd8x64 { .to_bitmask(); } - simdjson_really_inline uint64_t lteq(const T m) const { + simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, this->chunks[2] <= mask, this->chunks[3] <= mask) @@ -19283,11 +18827,11 @@ namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -19318,7 +18862,7 @@ static inline uint32_t hex_to_u32_nocheck( // // Note: we assume that surrogates are treated separately // -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -19350,10 +18894,10 @@ simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { #ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -19367,7 +18911,7 @@ static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64 using internal::value128; -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; #if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #ifdef _M_ARM64 @@ -19403,13 +18947,13 @@ namespace atomparsing { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -19417,36 +18961,36 @@ simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -19472,17 +19016,17 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - simdjson_really_inline bool has_quote_first() { + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return bs_bits != 0; } - simdjson_really_inline int quote_index() { + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } @@ -19490,7 +19034,7 @@ struct backslash_and_quote { uint32_t quote_bits; }; // struct backslash_and_quote -simdjson_really_inline backslash_and_quote +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -19517,147 +19061,6 @@ backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { } // namespace ppc64 } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace ppc64 { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_PPC64_STRINGPARSING_H /* end file include/simdjson/ppc64/stringparsing.h */ /* begin file include/simdjson/ppc64/numberparsing.h */ @@ -19676,7 +19079,7 @@ namespace { // we don't have appropriate instructions, so let us use a scalar function // credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -static simdjson_really_inline uint32_t +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { uint64_t val; std::memcpy(&val, chars, sizeof(uint64_t)); @@ -19737,7 +19140,7 @@ namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -19752,7 +19155,7 @@ simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponen // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -19812,7 +19215,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -19927,7 +19330,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -20035,7 +19438,7 @@ static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, dou // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -20062,7 +19465,7 @@ error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { +simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -20072,7 +19475,7 @@ simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { return true; } -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -20100,7 +19503,7 @@ simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *c return SUCCESS; } -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -20151,7 +19554,7 @@ simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t * return SUCCESS; } -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; @@ -20161,7 +19564,7 @@ simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, s } template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -20197,7 +19600,8 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. @@ -20217,20 +19621,20 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -20243,13 +19647,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -20393,7 +19797,7 @@ const uint8_t integer_string_finisher[256] = { NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -20443,7 +19847,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -20491,7 +19895,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( } // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. @@ -20541,12 +19945,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_ } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -20584,13 +19988,13 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -20627,24 +20031,24 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; + const uint8_t *const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -20656,11 +20060,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } + if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -20668,12 +20072,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in return negative ? (~i+1) : i; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -20740,19 +20144,19 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -20760,9 +20164,9 @@ simdjson_unused simdjson_really_inline simdjson_result is_integer(const ui return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -20786,13 +20190,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge } // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -20861,18 +20265,18 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -20939,7 +20343,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double_in_s if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -21004,7 +20408,7 @@ using namespace simdjson::dom; class implementation final : public simdjson::implementation { public: - simdjson_really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} simdjson_warn_unused error_code create_dom_parser_implementation( size_t capacity, size_t max_length, @@ -21044,7 +20448,7 @@ class implementation final : public simdjson::implementation { #include // for _mm_clmulepi64_si128 #endif - +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); #endif // SIMDJSON_WESTMERE_INTRINSICS_H /* end file include/simdjson/westmere/intrinsics.h */ @@ -21095,10 +20499,11 @@ class dom_parser_implementation final : public internal::dom_parser_implementati simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); }; @@ -21151,7 +20556,7 @@ namespace { // but the algorithms do not end up using the returned value. // Sadly, sanitizers are not smart enough to figure it out. SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline int trailing_zeroes(uint64_t input_num) { +simdjson_inline int trailing_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long ret; // Search the mask data from least significant bit (LSB) @@ -21164,12 +20569,12 @@ simdjson_really_inline int trailing_zeroes(uint64_t input_num) { } /* result might be undefined when input_num is zero */ -simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } /* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { +simdjson_inline int leading_zeroes(uint64_t input_num) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO unsigned long leading_zero = 0; // Search the mask data from most significant bit (MSB) @@ -21184,17 +20589,17 @@ simdjson_really_inline int leading_zeroes(uint64_t input_num) { } #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows return __popcnt64(input_num);// Visual Studio wants two underscores } #else -simdjson_really_inline long long int count_ones(uint64_t input_num) { +simdjson_inline long long int count_ones(uint64_t input_num) { return _popcnt64(input_num); } #endif -simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { #ifdef SIMDJSON_REGULAR_VISUAL_STUDIO return _addcarry_u64(0, value1, value2, @@ -21224,7 +20629,7 @@ namespace { // // For example, prefix_xor(00100100) == 00011100 // -simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { // There should be no such thing with a processing supporting avx2 // but not clmul. __m128i all_ones = _mm_set1_epi8('\xFF'); @@ -21253,23 +20658,23 @@ namespace simd { __m128i value; // Zero constructor - simdjson_really_inline base() : value{__m128i()} {} + simdjson_inline base() : value{__m128i()} {} // Conversion from SIMD register - simdjson_really_inline base(const __m128i _value) : value(_value) {} + simdjson_inline base(const __m128i _value) : value(_value) {} // Conversion to SIMD register - simdjson_really_inline operator const __m128i&() const { return this->value; } - simdjson_really_inline operator __m128i&() { return this->value; } + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } // Bit operations - simdjson_really_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } - simdjson_really_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } - simdjson_really_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } - simdjson_really_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } - simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } }; // Forward-declared so they can be used by splat and friends. @@ -21281,15 +20686,15 @@ namespace simd { typedef uint16_t bitmask_t; typedef uint32_t bitmask2_t; - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m128i _value) : base>(_value) {} + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } static const int SIZE = sizeof(base>::value); template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { + simdjson_inline simd8 prev(const simd8 prev_chunk) const { return _mm_alignr_epi8(*this, prev_chunk, 16 - N); } }; @@ -21297,27 +20702,27 @@ namespace simd { // SIMD byte mask type (returned by things like eq and gt) template<> struct simd8: base8 { - static simdjson_really_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m128i _value) : base8(_value) {} + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} // Splat constructor - simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } - simdjson_really_inline bool any() const { return !_mm_testz_si128(*this, *this); } - simdjson_really_inline simd8 operator~() const { return *this ^ true; } + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } }; template struct base8_numeric: base8 { - static simdjson_really_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } - static simdjson_really_inline simd8 zero() { return _mm_setzero_si128(); } - static simdjson_really_inline simd8 load(const T values[16]) { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { return _mm_loadu_si128(reinterpret_cast(values)); } // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16( + static simdjson_inline simd8 repeat_16( T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 ) { @@ -21327,24 +20732,24 @@ namespace simd { ); } - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m128i _value) : base8(_value) {} + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} // Store to array - simdjson_really_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { return _mm_shuffle_epi8(lookup_table, *this); } @@ -21356,7 +20761,7 @@ namespace simd { // signature simd8 compress(uint32_t mask) would be // sensible, but the AVX ISA makes this kind of approach difficult. template - simdjson_really_inline void compress(uint16_t mask, L * output) const { + simdjson_inline void compress(uint16_t mask, L * output) const { using internal::thintable_epi8; using internal::BitsSetTable256mul2; using internal::pshufb_combine_table; @@ -21387,7 +20792,7 @@ namespace simd { } template - simdjson_really_inline simd8 lookup_16( + simdjson_inline simd8 lookup_16( L replace0, L replace1, L replace2, L replace3, L replace4, L replace5, L replace6, L replace7, L replace8, L replace9, L replace10, L replace11, @@ -21404,14 +20809,14 @@ namespace simd { // Signed bytes template<> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const int8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) : simd8(_mm_setr_epi8( @@ -21419,7 +20824,7 @@ namespace simd { v8, v9, v10,v11,v12,v13,v14,v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 ) { @@ -21430,23 +20835,23 @@ namespace simd { } // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } }; // Unsigned bytes template<> struct simd8: base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) : base8_numeric(_value) {} + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} // Array constructor - simdjson_really_inline simd8(const uint8_t* values) : simd8(load(values)) {} + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} // Member-by-member initialization - simdjson_really_inline simd8( + simdjson_inline simd8( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) : simd8(_mm_setr_epi8( @@ -21454,7 +20859,7 @@ namespace simd { v8, v9, v10,v11,v12,v13,v14,v15 )) {} // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( + simdjson_inline static simd8 repeat_16( uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 ) { @@ -21465,39 +20870,39 @@ namespace simd { } // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } // Order-specific operations - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_really_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_really_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_really_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } - simdjson_really_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } - simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } template - simdjson_really_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } template - simdjson_really_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } // Get one of the bits and make a bitmask out of it. // e.g. value.get_bit<7>() gets the high bit template - simdjson_really_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } }; template @@ -21510,21 +20915,21 @@ namespace simd { simd8x64& operator=(const simd8& other) = delete; // no assignment allowed simd8x64() = delete; // no default constructor allowed - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} - simdjson_really_inline void store(T ptr[64]) const { + simdjson_inline void store(T ptr[64]) const { this->chunks[0].store(ptr+sizeof(simd8)*0); this->chunks[1].store(ptr+sizeof(simd8)*1); this->chunks[2].store(ptr+sizeof(simd8)*2); this->chunks[3].store(ptr+sizeof(simd8)*3); } - simdjson_really_inline simd8 reduce_or() const { + simdjson_inline simd8 reduce_or() const { return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); } - simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { this->chunks[0].compress(uint16_t(mask), output); this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); @@ -21532,7 +20937,7 @@ namespace simd { return 64 - count_ones(mask); } - simdjson_really_inline uint64_t to_bitmask() const { + simdjson_inline uint64_t to_bitmask() const { uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); uint64_t r1 = this->chunks[1].to_bitmask() ; uint64_t r2 = this->chunks[2].to_bitmask() ; @@ -21540,7 +20945,7 @@ namespace simd { return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); } - simdjson_really_inline uint64_t eq(const T m) const { + simdjson_inline uint64_t eq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] == mask, @@ -21550,7 +20955,7 @@ namespace simd { ).to_bitmask(); } - simdjson_really_inline uint64_t eq(const simd8x64 &other) const { + simdjson_inline uint64_t eq(const simd8x64 &other) const { return simd8x64( this->chunks[0] == other.chunks[0], this->chunks[1] == other.chunks[1], @@ -21559,7 +20964,7 @@ namespace simd { ).to_bitmask(); } - simdjson_really_inline uint64_t lteq(const T m) const { + simdjson_inline uint64_t lteq(const T m) const { const simd8 mask = simd8::splat(m); return simd8x64( this->chunks[0] <= mask, @@ -21586,11 +20991,11 @@ namespace jsoncharutils { // return non-zero if not a structural or whitespace char // zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace_negated[c]; } -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { return internal::structural_or_whitespace[c]; } @@ -21621,7 +21026,7 @@ static inline uint32_t hex_to_u32_nocheck( // // Note: we assume that surrogates are treated separately // -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { if (cp <= 0x7F) { c[0] = uint8_t(cp); return 1; // ascii @@ -21653,10 +21058,10 @@ simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { #ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm // this is a slow emulation routine for 32-bit // -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { return x * (uint64_t)y; } -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); @@ -21670,7 +21075,7 @@ static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64 using internal::value128; -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { value128 answer; #if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) #ifdef _M_ARM64 @@ -21706,13 +21111,13 @@ namespace atomparsing { // You might think that using memcpy makes this function expensive, but you'd be wrong. // All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); // to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } // Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. // Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); std::memcpy(&srcval, src, sizeof(uint32_t)); @@ -21720,36 +21125,36 @@ simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_true_atom(src); } else if (len == 4) { return !str4ncmp(src, "true"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { if (len > 5) { return is_valid_false_atom(src); } else if (len == 5) { return !str4ncmp(src+1, "alse"); } else { return false; } } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; } simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { if (len > 4) { return is_valid_null_atom(src); } else if (len == 4) { return !str4ncmp(src, "null"); } else { return false; } @@ -21774,18 +21179,18 @@ using namespace simd; struct backslash_and_quote { public: static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return bs_bits != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } uint32_t bs_bits; uint32_t quote_bits; }; // struct backslash_and_quote -simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { // this can read up to 31 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); @@ -21804,147 +21209,6 @@ simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(co } // namespace westmere } // namespace simdjson -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace westmere { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - #endif // SIMDJSON_WESTMERE_STRINGPARSING_H /* end file include/simdjson/westmere/stringparsing.h */ /* begin file include/simdjson/westmere/numberparsing.h */ @@ -21955,7 +21219,7 @@ namespace simdjson { namespace westmere { namespace { -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { // this actually computes *16* values so we are being wasteful. const __m128i ascii0 = _mm_set1_epi8('0'); const __m128i mul_1_10 = @@ -22018,7 +21282,7 @@ namespace { // Convert a mantissa, an exponent and a sign bit into an ieee64 double. // The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). // The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { double d; mantissa &= ~(1ULL << 52); mantissa |= real_exponent << 52; @@ -22033,7 +21297,7 @@ simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponen // set to false. This should work *most of the time* (like 99% of the time). // We assume that power is in the [smallest_power, // largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { // we start with a fast path // It was described in // Clinger WD. How to read floating point numbers accurately. @@ -22093,7 +21357,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg // In the slow path, we need to adjust i so that it is > 1<<63 which is always // possible, except if i == 0, so we handle i == 0 separately. if(i == 0) { - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } @@ -22208,7 +21472,7 @@ simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool neg if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? // Here have that real_exponent <= 0 so -real_exponent >= 0 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; + d = negative ? -0.0 : 0.0; return true; } // next line is safe because -real_exponent + 1 < 0 @@ -22316,7 +21580,7 @@ static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, dou // check quickly whether the next 8 chars are made of digits // at a glance, it looks better than Mula's // http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { uint64_t val; // this can read up to 7 bytes beyond the buffer size, but we require // SIMDJSON_PADDING of padding @@ -22343,7 +21607,7 @@ error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { template SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { +simdjson_inline bool parse_digit(const uint8_t c, I &i) { const uint8_t digit = static_cast(c - '0'); if (digit > 9) { return false; @@ -22353,7 +21617,7 @@ simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { return true; } -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { // we continue with the fiction that we have an integer. If the // floating point number is representable as x * 10^z for some integer // z that fits in 53 bits, then we will be able to convert back the @@ -22381,7 +21645,7 @@ simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *c return SUCCESS; } -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { // Exp Sign: -123.456e[-]78 bool neg_exp = ('-' == *p); if (neg_exp || '+' == *p) { p++; } // Skip + as well @@ -22432,7 +21696,7 @@ simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t * return SUCCESS; } -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const uint8_t *start = start_digits; @@ -22442,7 +21706,7 @@ simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, s } template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { // If we frequently had to deal with long strings of digits, // we could extend our code by using a 128-bit integer instead // of a 64-bit integer. However, this is uncommon in practice. @@ -22478,7 +21742,8 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); // if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); return SUCCESS; } else { // (exponent > largest_power) and (i != 0) // We have, for sure, an infinite value and simdjson refuses to parse infinite values. @@ -22498,20 +21763,20 @@ simdjson_really_inline error_code write_float(const uint8_t *const src, bool neg #ifdef SIMDJSON_SKIPNUMBERPARSING template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { writer.append_s64(0); // always write zero return SUCCESS; // always succeeds } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } #else // parse the number at src @@ -22524,13 +21789,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge // // Our objective is accurate parsing (ULP of 0) at high speed. template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -22674,7 +21939,7 @@ const uint8_t integer_string_finisher[256] = { NUMBER_ERROR}; // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -22724,7 +21989,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( // Parse any number from 0 to 18,446,744,073,709,551,615 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { const uint8_t *p = src; // // Parse the integer part. @@ -22772,7 +22037,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned( } // Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { const uint8_t *p = src + 1; // // Parse the integer part. @@ -22822,12 +22087,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_ } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -22865,13 +22130,13 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { // // Check for minus sign // if(src == src_end) { return NUMBER_ERROR; } bool negative = (*src == '-'); - const uint8_t *p = src + negative; + const uint8_t *p = src + uint8_t(negative); // // Parse the integer part. @@ -22908,24 +22173,24 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer(co } // Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. // // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; + const uint8_t *const start_digits = src; uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } + while (parse_digit(*src, i)) { src++; } // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); + size_t digit_count = size_t(src - start_digits); // We go from // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 // so we can never represent numbers that have more than 19 digits. @@ -22937,11 +22202,11 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in // Here digit_count > 0. if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; // } // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } + if(*src != '"') { return NUMBER_ERROR; } // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. // Performance note: This check is only needed when digit_count == longest_digit_count but it is // so cheap that we might as well always make it. @@ -22949,12 +22214,12 @@ simdjson_unused simdjson_really_inline simdjson_result parse_integer_in return negative ? (~i+1) : i; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -23021,19 +22286,19 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return (*src == '-'); } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -23041,9 +22306,9 @@ simdjson_unused simdjson_really_inline simdjson_result is_integer(const ui return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); const uint8_t *p = src; while(static_cast(*p - '0') <= 9) { p++; } if ( p == src ) { return NUMBER_ERROR; } @@ -23067,13 +22332,13 @@ simdjson_unused simdjson_really_inline simdjson_result ge } // Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { if(src == src_end) { return NUMBER_ERROR; } // // Check for minus sign // bool negative = (*src == '-'); - src += negative; + src += uint8_t(negative); // // Parse the integer part. @@ -23142,18 +22407,18 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double(cons if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, src_end, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { return NUMBER_ERROR; } return d; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { // // Check for minus sign // bool negative = (*(src + 1) == '-'); - src += negative + 1; + src += uint8_t(negative) + 1; // // Parse the integer part. @@ -23220,7 +22485,7 @@ simdjson_unused simdjson_really_inline simdjson_result parse_double_in_s if (simdjson_likely(!overflow)) { if (compute_float_64(exponent, i, negative, d)) { return d; } } - if (!parse_float_fallback(src-negative, &d)) { + if (!parse_float_fallback(src - uint8_t(negative), &d)) { return NUMBER_ERROR; } return d; @@ -23309,22 +22574,22 @@ struct implementation_simdjson_result_base { /** * Create a new empty result with error = UNINITIALIZED. */ - simdjson_really_inline implementation_simdjson_result_base() noexcept = default; + simdjson_inline implementation_simdjson_result_base() noexcept = default; /** * Create a new error result. */ - simdjson_really_inline implementation_simdjson_result_base(error_code error) noexcept; + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; /** * Create a new successful result. */ - simdjson_really_inline implementation_simdjson_result_base(T &&value) noexcept; + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; /** * Create a new result with both things (use if you don't want to branch when creating the result). */ - simdjson_really_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; /** * Move the value and the error to the provided variables. @@ -23332,19 +22597,19 @@ struct implementation_simdjson_result_base { * @param value The variable to assign the value to. May not be set if there is an error. * @param error The variable to assign the error to. Set to SUCCESS if there is no error. */ - simdjson_really_inline void tie(T &value, error_code &error) && noexcept; + simdjson_inline void tie(T &value, error_code &error) && noexcept; /** * Move the value to the provided variable. * * @param value The variable to assign the value to. May not be set if there is an error. */ - simdjson_really_inline error_code get(T &value) && noexcept; + simdjson_inline error_code get(T &value) && noexcept; /** * The error. */ - simdjson_really_inline error_code error() const noexcept; + simdjson_inline error_code error() const noexcept; #if SIMDJSON_EXCEPTIONS @@ -23353,28 +22618,28 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T& value() & noexcept(false); + simdjson_inline T& value() & noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& value() && noexcept(false); + simdjson_inline T&& value() && noexcept(false); /** * Take the result value (move it). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline T&& take_value() && noexcept(false); + simdjson_inline T&& take_value() && noexcept(false); /** * Cast to the value (will throw on error). * * @throw simdjson_error if there was an error. */ - simdjson_really_inline operator T&&() && noexcept(false); + simdjson_inline operator T&&() && noexcept(false); #endif // SIMDJSON_EXCEPTIONS @@ -23383,17 +22648,17 @@ struct implementation_simdjson_result_base { * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline const T& value_unsafe() const& noexcept; + simdjson_inline const T& value_unsafe() const& noexcept; /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline T& value_unsafe() & noexcept; + simdjson_inline T& value_unsafe() & noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. */ - simdjson_really_inline T&& value_unsafe() && noexcept; + simdjson_inline T&& value_unsafe() && noexcept; protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -23457,46 +22722,46 @@ struct number { * unsigned_integer /// a positive integer larger or equal to 1<<63 * }; */ - simdjson_really_inline number_type get_number_type() const noexcept; + simdjson_inline number_type get_number_type() const noexcept; /** * return true if the automatically determined type of * the number is number_type::unsigned_integer. */ - simdjson_really_inline bool is_uint64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; /** * return the value as a uint64_t, only valid if is_uint64() is true. */ - simdjson_really_inline uint64_t get_uint64() const noexcept; - simdjson_really_inline operator uint64_t() const noexcept; + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::signed_integer. */ - simdjson_really_inline bool is_int64() const noexcept; + simdjson_inline bool is_int64() const noexcept; /** * return the value as a int64_t, only valid if is_int64() is true. */ - simdjson_really_inline int64_t get_int64() const noexcept; - simdjson_really_inline operator int64_t() const noexcept; + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; /** * return true if the automatically determined type of * the number is number_type::floating_point_number. */ - simdjson_really_inline bool is_double() const noexcept; + simdjson_inline bool is_double() const noexcept; /** * return the value as a double, only valid if is_double() is true. */ - simdjson_really_inline double get_double() const noexcept; - simdjson_really_inline operator double() const noexcept; + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; /** * Convert the number to a double. Though it always succeed, the conversion * may be lossy if the number cannot be represented exactly. */ - simdjson_really_inline double as_double() const noexcept; + simdjson_inline double as_double() const noexcept; protected: @@ -23513,13 +22778,13 @@ struct number { template friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); /** Store a signed 64-bit value to the number. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; + simdjson_inline void append_s64(int64_t value) noexcept; /** Store an unsigned 64-bit value to the number. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; + simdjson_inline void append_u64(uint64_t value) noexcept; /** Store a double value to the number. */ - simdjson_really_inline void append_double(double value) noexcept; + simdjson_inline void append_double(double value) noexcept; /** Specifies that the value is a double, but leave it undefined. */ - simdjson_really_inline void skip_double() noexcept; + simdjson_inline void skip_double() noexcept; /** * End of friend declarations. */ @@ -23567,10 +22832,10 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson @@ -23648,12 +22913,19 @@ class json_iterator; * * This class is deliberately simplistic and has little functionality. You can * compare a raw_json_string instance with an unescaped C string, but - * that is pretty much all you can do. + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). * - * They originate typically from field instance which in turn represent key-value pairs from - * object instances. From a field instance, you get the raw_json_string instance by calling key(). - * You can, if you want a more usable string_view instance, call the unescaped_key() method - * on the field instance. */ class raw_json_string { public: @@ -23662,7 +22934,7 @@ class raw_json_string { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline raw_json_string() noexcept = default; + simdjson_inline raw_json_string() noexcept = default; /** * Create a new invalid raw_json_string pointed at the given location in the JSON. @@ -23671,14 +22943,14 @@ class raw_json_string { * * It *must* be terminated by a ", and be a valid JSON string. */ - simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept; + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; /** * Get the raw pointer to the beginning of the string in the JSON (just after the "). * * It is possible for this function to return a null pointer if the instance * has outlived its existence. */ - simdjson_really_inline const char * raw() const noexcept; + simdjson_inline const char * raw() const noexcept; /** * This compares the current instance to the std::string_view target: returns true if @@ -23694,7 +22966,7 @@ class raw_json_string { * Performance: the comparison may be done using memcmp which may be efficient * for long strings. */ - simdjson_really_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if @@ -23711,7 +22983,7 @@ class raw_json_string { * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ - simdjson_really_inline bool unsafe_is_equal(std::string_view target) const noexcept; + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if @@ -23725,27 +22997,27 @@ class raw_json_string { * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); * s.unsafe_is_equal(target); */ - simdjson_really_inline bool unsafe_is_equal(const char* target) const noexcept; + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done). */ - simdjson_really_inline bool is_equal(std::string_view target) const noexcept; + simdjson_inline bool is_equal(std::string_view target) const noexcept; /** * This compares the current instance to the C string target: returns true if * they are byte-by-byte equal (no escaping is done). */ - simdjson_really_inline bool is_equal(const char* target) const noexcept; + simdjson_inline bool is_equal(const char* target) const noexcept; /** * Returns true if target is free from unescaped quote. If target is known at * compile-time, we might expect the computation to happen at compile time with * many compilers (not all!). */ - static simdjson_really_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; - static simdjson_really_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; private: @@ -23754,27 +23026,13 @@ class raw_json_string { * This will set the inner pointer to zero, effectively making * this instance unusable. */ - simdjson_really_inline void consume() noexcept { buf = nullptr; } + simdjson_inline void consume() noexcept { buf = nullptr; } /** * Checks whether the inner pointer is non-null and thus usable. */ - simdjson_really_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } - /** - * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * - * ## IMPORTANT: string_view lifetime - * - * The string_view is only valid as long as the bytes in dst. - * - * @param dst A pointer to a buffer at least large enough to write this string as well as a \0. - * dst will be updated to the next unused location (just after the \0 written out at - * the end of this string). - * @return A string_view pointing at the unescaped string in dst - * @error STRING_ERROR if escapes are incorrect. - */ - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; /** * Unescape this JSON string, replacing \\ with \, \n with newline, etc. * @@ -23784,24 +23042,25 @@ class raw_json_string { * * @param iter A json_iterator, which contains a buffer where the string will be written. */ - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter) const noexcept; const uint8_t * buf{}; friend class object; friend class field; + friend class parser; friend struct simdjson_result; }; -simdjson_unused simdjson_really_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; /** * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. */ -simdjson_unused simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; -simdjson_unused simdjson_really_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; -simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; -simdjson_unused simdjson_really_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; } // namespace ondemand @@ -23813,14 +23072,13 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private - - simdjson_really_inline simdjson_result raw() const noexcept; - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept; }; } // namespace simdjson @@ -23843,20 +23101,20 @@ class token_iterator { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline token_iterator() noexcept = default; - simdjson_really_inline token_iterator(token_iterator &&other) noexcept = default; - simdjson_really_inline token_iterator &operator=(token_iterator &&other) noexcept = default; - simdjson_really_inline token_iterator(const token_iterator &other) noexcept = default; - simdjson_really_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; /** * Advance to the next token (returning the current one). */ - simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; + simdjson_inline const uint8_t *return_current_and_advance() noexcept; /** * Reports the current offset in bytes from the start of the underlying buffer. */ - simdjson_really_inline uint32_t current_offset() const noexcept; + simdjson_inline uint32_t current_offset() const noexcept; /** * Get the JSON text for a given token (relative). * @@ -23868,7 +23126,7 @@ class token_iterator { * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ - simdjson_really_inline const uint8_t *peek(int32_t delta=0) const noexcept; + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for a given token. * @@ -23877,7 +23135,7 @@ class token_iterator { * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ - simdjson_really_inline uint32_t peek_length(int32_t delta=0) const noexcept; + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get the JSON text for a given token. @@ -23887,7 +23145,7 @@ class token_iterator { * @param position The position of the token. * */ - simdjson_really_inline const uint8_t *peek(token_position position) const noexcept; + simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for a given token. * @@ -23895,29 +23153,29 @@ class token_iterator { * * @param position The position of the token. */ - simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; + simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Return the current index. */ - simdjson_really_inline token_position position() const noexcept; + simdjson_inline token_position position() const noexcept; /** * Reset to a previously saved index. */ - simdjson_really_inline void set_position(token_position target_position) noexcept; + simdjson_inline void set_position(token_position target_position) noexcept; // NOTE: we don't support a full C++ iterator interface, because we expect people to make // different calls to advance the iterator based on *their own* state. - simdjson_really_inline bool operator==(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator!=(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator>(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator>=(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator<(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator<=(const token_iterator &other) const noexcept; + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; protected: - simdjson_really_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; /** * Get the index of the JSON text for a given token (relative). @@ -23927,7 +23185,7 @@ class token_iterator { * @param delta The relative position of the token to retrieve. e.g. 0 = current token, * 1 = next token, -1 = prev token. */ - simdjson_really_inline uint32_t peek_index(int32_t delta=0) const noexcept; + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; /** * Get the index of the JSON text for a given token. * @@ -23936,7 +23194,7 @@ class token_iterator { * @param position The position of the token. * */ - simdjson_really_inline uint32_t peek_index(token_position position) const noexcept; + simdjson_inline uint32_t peek_index(token_position position) const noexcept; const uint8_t *buf{}; token_position _position{}; @@ -23944,8 +23202,8 @@ class token_iterator { friend class json_iterator; friend class value_iterator; friend class object; - friend simdjson_really_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; - friend simdjson_really_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; }; } // namespace ondemand @@ -23957,10 +23215,10 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private }; } // namespace simdjson @@ -24029,20 +23287,20 @@ class json_iterator { bool _streaming{false}; public: - simdjson_really_inline json_iterator() noexcept = default; - simdjson_really_inline json_iterator(json_iterator &&other) noexcept; - simdjson_really_inline json_iterator &operator=(json_iterator &&other) noexcept; - simdjson_really_inline explicit json_iterator(const json_iterator &other) noexcept = default; - simdjson_really_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; /** * Skips a JSON value, whether it is a scalar, array or object. */ - simdjson_warn_unused simdjson_really_inline error_code skip_child(depth_t parent_depth) noexcept; + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; /** * Tell whether the iterator is still at the start */ - simdjson_really_inline bool at_root() const noexcept; + simdjson_inline bool at_root() const noexcept; /** * Tell whether we should be expected to run in streaming @@ -24050,53 +23308,61 @@ class json_iterator { * that does not affect how the iterator works. It is used by * start_root_array() and start_root_object(). */ - simdjson_really_inline bool streaming() const noexcept; + simdjson_inline bool streaming() const noexcept; /** * Get the root value iterator */ - simdjson_really_inline token_position root_position() const noexcept; + simdjson_inline token_position root_position() const noexcept; /** * Assert that we are at the document depth (== 1) */ - simdjson_really_inline void assert_at_document_depth() const noexcept; + simdjson_inline void assert_at_document_depth() const noexcept; /** * Assert that we are at the root of the document */ - simdjson_really_inline void assert_at_root() const noexcept; + simdjson_inline void assert_at_root() const noexcept; /** * Tell whether the iterator is at the EOF mark */ - simdjson_really_inline bool at_end() const noexcept; + simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is live (has not been moved). */ - simdjson_really_inline bool is_alive() const noexcept; + simdjson_inline bool is_alive() const noexcept; /** * Abandon this iterator, setting depth to 0 (as if the document is finished). */ - simdjson_really_inline void abandon() noexcept; + simdjson_inline void abandon() noexcept; /** * Advance the current token without modifying depth. */ - simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; /** * Assert that there are at least the given number of tokens left. * * Has no effect in release builds. */ - simdjson_really_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; /** * Assert that the given position addresses an actual token (is within bounds). * * Has no effect in release builds. */ - simdjson_really_inline void assert_valid_position(token_position position) const noexcept; + simdjson_inline void assert_valid_position(token_position position) const noexcept; /** * Get the JSON text for a given token (relative). * @@ -24107,7 +23373,7 @@ class json_iterator { * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ - simdjson_really_inline const uint8_t *peek(int32_t delta=0) const noexcept; + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * @@ -24115,7 +23381,7 @@ class json_iterator { * * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. */ - simdjson_really_inline uint32_t peek_length(int32_t delta=0) const noexcept; + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; /** * Get a pointer to the current location in the input buffer. * @@ -24124,7 +23390,7 @@ class json_iterator { * You may be pointing outside of the input buffer: it is not generally * safe to dereference this pointer. */ - simdjson_really_inline const uint8_t *unsafe_pointer() const noexcept; + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; /** * Get the JSON text for a given token. * @@ -24135,7 +23401,7 @@ class json_iterator { * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ - simdjson_really_inline const uint8_t *peek(token_position position) const noexcept; + simdjson_inline const uint8_t *peek(token_position position) const noexcept; /** * Get the maximum length of the JSON text for the current token (or relative). * @@ -24143,7 +23409,7 @@ class json_iterator { * * @param position The position of the token to retrieve. */ - simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; + simdjson_inline uint32_t peek_length(token_position position) const noexcept; /** * Get the JSON text for the last token in the document. * @@ -24152,7 +23418,7 @@ class json_iterator { * TODO consider a string_view, assuming the length will get stripped out by the optimizer when * it isn't used ... */ - simdjson_really_inline const uint8_t *peek_last() const noexcept; + simdjson_inline const uint8_t *peek_last() const noexcept; /** * Ascend one level. @@ -24161,7 +23427,7 @@ class json_iterator { * * @param parent_depth the expected parent depth. */ - simdjson_really_inline void ascend_to(depth_t parent_depth) noexcept; + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; /** * Descend one level. @@ -24170,18 +23436,18 @@ class json_iterator { * * @param child_depth the expected child depth. */ - simdjson_really_inline void descend_to(depth_t child_depth) noexcept; - simdjson_really_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; /** * Get current depth. */ - simdjson_really_inline depth_t depth() const noexcept; + simdjson_inline depth_t depth() const noexcept; /** * Get current (writeable) location in the string buffer. */ - simdjson_really_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; /** * Report an unrecoverable error, preventing further iteration. @@ -24189,23 +23455,31 @@ class json_iterator { * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_really_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; /** * Log error, but don't stop iteration. * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. * @param message An error message to report with the error. */ - simdjson_really_inline error_code optional_error(error_code error, const char *message) noexcept; + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; - template simdjson_warn_unused simdjson_really_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept; + template simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept; - simdjson_really_inline token_position position() const noexcept; - simdjson_really_inline void reenter_child(token_position position, depth_t child_depth) noexcept; -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - simdjson_really_inline token_position start_position(depth_t depth) const noexcept; - simdjson_really_inline void set_start_position(depth_t depth, token_position position) noexcept; + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in) noexcept; + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; #endif + /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; @@ -24219,14 +23493,21 @@ class json_iterator { * as if it had just been created. */ inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; protected: - simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; /// The last token before the end - simdjson_really_inline token_position last_position() const noexcept; + simdjson_inline token_position last_position() const noexcept; /// The token *at* the end. This points at gibberish and should only be used for comparison. - simdjson_really_inline token_position end_position() const noexcept; + simdjson_inline token_position end_position() const noexcept; /// The end of the buffer. - simdjson_really_inline token_position end() const noexcept; + simdjson_inline token_position end() const noexcept; friend class document; friend class document_stream; @@ -24236,8 +23517,8 @@ class json_iterator { friend class raw_json_string; friend class parser; friend class value_iterator; - friend simdjson_really_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; - friend simdjson_really_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; }; // json_iterator } // namespace ondemand @@ -24249,10 +23530,10 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson @@ -24289,61 +23570,61 @@ class value_iterator { token_position _start_position{}; public: - simdjson_really_inline value_iterator() noexcept = default; + simdjson_inline value_iterator() noexcept = default; /** * Denote that we're starting a document. */ - simdjson_really_inline void start_document() noexcept; + simdjson_inline void start_document() noexcept; /** * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. * * Optimized for scalars. */ - simdjson_warn_unused simdjson_really_inline error_code skip_child() noexcept; + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; /** * Tell whether the iterator is at the EOF mark */ - simdjson_really_inline bool at_end() const noexcept; + simdjson_inline bool at_end() const noexcept; /** * Tell whether the iterator is at the start of the value */ - simdjson_really_inline bool at_start() const noexcept; + simdjson_inline bool at_start() const noexcept; /** * Tell whether the value is open--if the value has not been used, or the array/object is still open. */ - simdjson_really_inline bool is_open() const noexcept; + simdjson_inline bool is_open() const noexcept; /** * Tell whether the value is at an object's first field (just after the {). */ - simdjson_really_inline bool at_first_field() const noexcept; + simdjson_inline bool at_first_field() const noexcept; /** * Abandon all iteration. */ - simdjson_really_inline void abandon() noexcept; + simdjson_inline void abandon() noexcept; /** * Get the child value as a value_iterator. */ - simdjson_really_inline value_iterator child_value() const noexcept; + simdjson_inline value_iterator child_value() const noexcept; /** * Get the depth of this value. */ - simdjson_really_inline int32_t depth() const noexcept; + simdjson_inline int32_t depth() const noexcept; /** * Get the JSON type of this value. * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result type() const noexcept; + simdjson_inline simdjson_result type() const noexcept; /** * @addtogroup object Object iteration @@ -24360,7 +23641,7 @@ class value_iterator { * @returns Whether the object had any fields (returns false for empty). * @error INCORRECT_TYPE if there is no opening { */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_object() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; /** * Start an object iteration from the root. * @@ -24368,7 +23649,7 @@ class value_iterator { * @error INCORRECT_TYPE if there is no opening { * @error TAPE_ERROR if there is no matching } at end of document */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_root_object() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; /** * Start an object iteration after the user has already checked and moved past the {. @@ -24379,7 +23660,7 @@ class value_iterator { * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_object() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** * Start an object iteration from the root, after the user has already checked and moved past the {. * @@ -24389,7 +23670,7 @@ class value_iterator { * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_root_object() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; /** * Moves to the next field in an object. @@ -24401,17 +23682,17 @@ class value_iterator { * @error TAPE_ERROR If there is a comma missing between fields. * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. */ - simdjson_warn_unused simdjson_really_inline simdjson_result has_next_field() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; /** * Get the current field's key. */ - simdjson_warn_unused simdjson_really_inline simdjson_result field_key() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; /** * Pass the : in the field and move to its value. */ - simdjson_warn_unused simdjson_really_inline error_code field_value() noexcept; + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; /** * Find the next field with the given key. @@ -24429,7 +23710,7 @@ class value_iterator { * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ - simdjson_warn_unused simdjson_really_inline error_code find_field(const std::string_view key) noexcept; + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; /** * Find the next field with the given key, *without* unescaping. This assumes object order: it @@ -24448,7 +23729,7 @@ class value_iterator { * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ - simdjson_warn_unused simdjson_really_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; /** * Find the field with the given key without regard to order, and *without* unescaping. @@ -24468,7 +23749,7 @@ class value_iterator { * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may * fail to match some keys with escapes (\u, \n, etc.). */ - simdjson_warn_unused simdjson_really_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; /** @} */ @@ -24485,7 +23766,7 @@ class value_iterator { * @returns Whether the array had any elements (returns false for empty). * @error INCORRECT_TYPE If there is no [. */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_array() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; /** * Check for an opening [ and start an array iteration while at the root. * @@ -24493,7 +23774,7 @@ class value_iterator { * @error INCORRECT_TYPE If there is no [. * @error TAPE_ERROR if there is no matching ] at end of document */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_root_array() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; /** * Start an array iteration, after the user has already checked and moved past the [. @@ -24504,7 +23785,7 @@ class value_iterator { * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_array() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** * Start an array iteration from the root, after the user has already checked and moved past the [. * @@ -24514,7 +23795,7 @@ class value_iterator { * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* * array or object is incomplete). */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_root_array() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; /** * Moves to the next element in an array. @@ -24525,12 +23806,12 @@ class value_iterator { * @return Whether there is another element in the array. * @error TAPE_ERROR If there is a comma missing between elements. */ - simdjson_warn_unused simdjson_really_inline simdjson_result has_next_element() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; /** * Get a child value iterator. */ - simdjson_warn_unused simdjson_really_inline value_iterator child() const noexcept; + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; /** @} */ @@ -24540,43 +23821,43 @@ class value_iterator { * @{ */ - simdjson_warn_unused simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_int64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_double_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline bool is_null() noexcept; - simdjson_warn_unused simdjson_really_inline bool is_negative() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; - - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_raw_json_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_uint64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_int64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_double_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_bool() noexcept; - simdjson_warn_unused simdjson_really_inline bool is_root_negative() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result is_root_integer() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_number_type() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_number() noexcept; - simdjson_really_inline bool is_root_null() noexcept; - - simdjson_really_inline error_code error() const noexcept; - simdjson_really_inline uint8_t *&string_buf_loc() noexcept; - simdjson_really_inline const json_iterator &json_iter() const noexcept; - simdjson_really_inline json_iterator &json_iter() noexcept; - - simdjson_really_inline void assert_is_valid() const noexcept; - simdjson_really_inline bool is_valid() const noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool() noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number() noexcept; + simdjson_inline bool is_root_null() noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; /** @} */ protected: @@ -24584,12 +23865,12 @@ class value_iterator { * Restarts an array iteration. * @returns Whether the array has any elements (returns false for empty). */ - simdjson_really_inline simdjson_result reset_array() noexcept; + simdjson_inline simdjson_result reset_array() noexcept; /** * Restarts an object iteration. * @returns Whether the object has any fields (returns false for empty). */ - simdjson_really_inline simdjson_result reset_object() noexcept; + simdjson_inline simdjson_result reset_object() noexcept; /** * move_at_start(): moves us so that we are pointing at the beginning of * the container. It updates the index so that at_start() is true and it @@ -24597,7 +23878,7 @@ class value_iterator { * * Usage: used with value::count_elements(). **/ - simdjson_really_inline void move_at_start() noexcept; + simdjson_inline void move_at_start() noexcept; /** * move_at_container_start(): moves us so that we are pointing at the beginning of @@ -24605,15 +23886,15 @@ class value_iterator { * * Usage: used with reset_array() and reset_object(). **/ - simdjson_really_inline void move_at_container_start() noexcept; + simdjson_inline void move_at_container_start() noexcept; /* Useful for debugging and logging purposes. */ inline std::string to_string() const noexcept; - simdjson_really_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; - simdjson_really_inline bool parse_null(const uint8_t *json) const noexcept; - simdjson_really_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; - simdjson_really_inline const uint8_t *peek_start() const noexcept; - simdjson_really_inline uint32_t peek_start_length() const noexcept; + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; /** * The general idea of the advance_... methods and the peek_* methods @@ -24644,17 +23925,17 @@ class value_iterator { * Unfortunately, it makes the code more verbose, longer and maybe more error prone. */ - simdjson_really_inline void advance_scalar(const char *type) noexcept; - simdjson_really_inline void advance_root_scalar(const char *type) noexcept; - simdjson_really_inline void advance_non_root_scalar(const char *type) noexcept; + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; - simdjson_really_inline const uint8_t *peek_scalar(const char *type) noexcept; - simdjson_really_inline const uint8_t *peek_root_scalar(const char *type) noexcept; - simdjson_really_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; - simdjson_really_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; - simdjson_really_inline error_code end_container() noexcept; + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; /** * Advance to a place expecting a value (increasing depth). @@ -24662,19 +23943,19 @@ class value_iterator { * @return The current token (the one left behind). * @error TAPE_ERROR If the document ended early. */ - simdjson_really_inline simdjson_result advance_to_value() noexcept; + simdjson_inline simdjson_result advance_to_value() noexcept; - simdjson_really_inline error_code incorrect_type_error(const char *message) const noexcept; - simdjson_really_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; - simdjson_really_inline bool is_at_start() const noexcept; + simdjson_inline bool is_at_start() const noexcept; /** * is_at_iterator_start() returns true on an array or object after it has just been * created, whether the instance is empty or not. * * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) */ - simdjson_really_inline bool is_at_iterator_start() const noexcept; + simdjson_inline bool is_at_iterator_start() const noexcept; /** * Assuming that we are within an object, this returns true if we @@ -24683,7 +23964,7 @@ class value_iterator { * Usage: the skip_child() method should never be used while we are pointing * at a key inside an object. */ - simdjson_really_inline bool is_at_key() const noexcept; + simdjson_inline bool is_at_key() const noexcept; inline void assert_at_start() const noexcept; inline void assert_at_container_start() const noexcept; @@ -24693,16 +23974,16 @@ class value_iterator { inline void assert_at_non_root_start() const noexcept; /** Get the starting position of this value */ - simdjson_really_inline token_position start_position() const noexcept; + simdjson_inline token_position start_position() const noexcept; /** @copydoc error_code json_iterator::position() const noexcept; */ - simdjson_really_inline token_position position() const noexcept; + simdjson_inline token_position position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ - simdjson_really_inline token_position last_position() const noexcept; + simdjson_inline token_position last_position() const noexcept; /** @copydoc error_code json_iterator::end_position() const noexcept; */ - simdjson_really_inline token_position end_position() const noexcept; + simdjson_inline token_position end_position() const noexcept; /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ - simdjson_really_inline error_code report_error(error_code error, const char *message) noexcept; + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; friend class document; friend class object; @@ -24719,9 +24000,9 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson @@ -24747,7 +24028,7 @@ class document; class array_iterator { public: /** Create a new, invalid array iterator. */ - simdjson_really_inline array_iterator() noexcept = default; + simdjson_inline array_iterator() noexcept = default; // // Iterator interface @@ -24758,7 +24039,7 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -24766,7 +24047,7 @@ class array_iterator { * * @return true if there are no more elements in the JSON array. */ - simdjson_really_inline bool operator==(const array_iterator &) const noexcept; + simdjson_inline bool operator==(const array_iterator &) const noexcept; /** * Check if there are more elements in the JSON array. * @@ -24774,18 +24055,18 @@ class array_iterator { * * @return true if there are more elements in the JSON array. */ - simdjson_really_inline bool operator!=(const array_iterator &) const noexcept; + simdjson_inline bool operator!=(const array_iterator &) const noexcept; /** * Move to the next element. * * Part of the std::iterator interface. */ - simdjson_really_inline array_iterator &operator++() noexcept; + simdjson_inline array_iterator &operator++() noexcept; private: value_iterator iter{}; - simdjson_really_inline array_iterator(const value_iterator &iter) noexcept; + simdjson_inline array_iterator(const value_iterator &iter) noexcept; friend class array; friend class value; @@ -24801,18 +24082,18 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_really_inline bool operator==(const simdjson_result &) const noexcept; - simdjson_really_inline bool operator!=(const simdjson_result &) const noexcept; - simdjson_really_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson @@ -24832,7 +24113,7 @@ class object_iterator { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline object_iterator() noexcept = default; + simdjson_inline object_iterator() noexcept = default; // // Iterator interface @@ -24840,13 +24121,13 @@ class object_iterator { // Reads key and value, yielding them to the user. // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_really_inline simdjson_result operator*() noexcept; + simdjson_inline simdjson_result operator*() noexcept; // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_really_inline bool operator==(const object_iterator &) const noexcept; + simdjson_inline bool operator==(const object_iterator &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_really_inline bool operator!=(const object_iterator &) const noexcept; + simdjson_inline bool operator!=(const object_iterator &) const noexcept; // Checks for ']' and ',' - simdjson_really_inline object_iterator &operator++() noexcept; + simdjson_inline object_iterator &operator++() noexcept; private: /** @@ -24857,7 +24138,7 @@ class object_iterator { */ value_iterator iter{}; - simdjson_really_inline object_iterator(const value_iterator &iter) noexcept; + simdjson_inline object_iterator(const value_iterator &iter) noexcept; friend struct simdjson_result; friend class object; }; @@ -24871,22 +24152,22 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; // // Iterator interface // // Reads key and value, yielding them to the user. - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_really_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator==(const simdjson_result &) const noexcept; // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_really_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; // Checks for ']' and ',' - simdjson_really_inline simdjson_result &operator++() noexcept; + simdjson_inline simdjson_result &operator++() noexcept; }; } // namespace simdjson @@ -24910,20 +24191,20 @@ class array { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline array() noexcept = default; + simdjson_inline array() noexcept = default; /** * Begin array iteration. * * Part of the std::iterable interface. */ - simdjson_really_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result begin() noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ - simdjson_really_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result end() noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun @@ -24938,7 +24219,7 @@ class array { * To check that an array is empty, it is more performant to use * the is_empty() method. */ - simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the beginning of the array and checks whether the * array is empty. @@ -24948,7 +24229,7 @@ class array { * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ - simdjson_really_inline simdjson_result is_empty() & noexcept; + simdjson_inline simdjson_result is_empty() & noexcept; /** * Reset the iterator so that we are pointing back at the * beginning of the array. You should still consume values only once even if you @@ -24993,7 +24274,7 @@ class array { * Consumes the array and returns a string_view instance corresponding to the * array as represented in JSON. It points inside the original document. */ - simdjson_really_inline simdjson_result raw_json() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; /** * Get the value at the given index. This function has linear-time complexity. @@ -25002,12 +24283,12 @@ class array { * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ - simdjson_really_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; protected: /** * Go to the end of the array, no matter where you are right now. */ - simdjson_really_inline error_code consume() noexcept; + simdjson_inline error_code consume() noexcept; /** * Begin array iteration. @@ -25016,7 +24297,7 @@ class array { * resulting array. * @error INCORRECT_TYPE if the iterator is not at [. */ - static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; /** * Begin array iteration from the root. * @@ -25025,7 +24306,7 @@ class array { * @error INCORRECT_TYPE if the iterator is not at [. * @error TAPE_ERROR if there is no closing ] at the end of the document. */ - static simdjson_really_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; /** * Begin array iteration. * @@ -25034,7 +24315,7 @@ class array { * * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. */ - static simdjson_really_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; /** * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. @@ -25043,7 +24324,7 @@ class array { * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* * into the resulting array. */ - simdjson_really_inline array(const value_iterator &iter) noexcept; + simdjson_inline array(const value_iterator &iter) noexcept; /** * Iterator marking current position. @@ -25068,17 +24349,17 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result begin() noexcept; - simdjson_really_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; inline simdjson_result count_elements() & noexcept; inline simdjson_result is_empty() & noexcept; inline simdjson_result reset() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson @@ -25111,11 +24392,11 @@ class document { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline document() noexcept = default; - simdjson_really_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy - simdjson_really_inline document(document &&other) noexcept = default; - simdjson_really_inline document &operator=(const document &other) noexcept = delete; - simdjson_really_inline document &operator=(document &&other) noexcept = default; + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; /** * Cast this JSON value to an array. @@ -25123,49 +24404,49 @@ class document { * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ - simdjson_really_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_array() & noexcept; /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ - simdjson_really_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to an unsigned integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ - simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. * * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ - simdjson_really_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. * * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ - simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double. @@ -25173,7 +24454,7 @@ class document { * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ - simdjson_really_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. * @@ -25185,7 +24466,7 @@ class document { * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_string() noexcept; /** * Cast this JSON value to a raw_json_string. * @@ -25194,28 +24475,31 @@ class document { * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. * * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ - simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; /** * Cast this JSON value to a value when the document is an object or an array. * * @returns A value if a JSON array or object cannot be found. * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). */ - simdjson_really_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result get_value() noexcept; /** - * Checks if this JSON value is null. + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ - simdjson_really_inline bool is_null() noexcept; + simdjson_inline simdjson_result is_null() noexcept; /** * Get this value as the given type. @@ -25228,13 +24512,13 @@ class document { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_really_inline simdjson_result get() & noexcept { + template simdjson_inline simdjson_result get() & noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); } /** @overload template simdjson_result get() & noexcept */ - template simdjson_really_inline simdjson_result get() && noexcept { + template simdjson_inline simdjson_result get() && noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); @@ -25251,9 +24535,9 @@ class document { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_really_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) & noexcept; /** @overload template error_code get(T &out) & noexcept */ - template simdjson_really_inline error_code get(T &out) && noexcept; + template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -25262,35 +24546,35 @@ class document { * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ - simdjson_really_inline operator array() & noexcept(false); + simdjson_inline operator array() & noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ - simdjson_really_inline operator object() & noexcept(false); + simdjson_inline operator object() & noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ - simdjson_really_inline operator int64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ - simdjson_really_inline operator double() noexcept(false); + simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * @@ -25300,7 +24584,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * @@ -25309,21 +24593,21 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ - simdjson_really_inline operator bool() noexcept(false); + simdjson_inline operator bool() noexcept(false); /** * Cast this JSON value to a value. * * @returns A value value. * @exception if a JSON value cannot be found */ - simdjson_really_inline operator value() noexcept(false); + simdjson_inline operator value() noexcept(false); #endif /** * This method scans the array and counts the number of elements. @@ -25336,7 +24620,7 @@ class document { * there is a missing comma), then an error is returned and it is no longer * safe to continue. */ - simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun @@ -25351,7 +24635,7 @@ class document { * To check that an object is empty, it is more performant to use * the is_empty() method. */ - simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. @@ -25359,19 +24643,19 @@ class document { * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ - simdjson_really_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; /** * Begin array iteration. * * Part of the std::iterable interface. */ - simdjson_really_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ - simdjson_really_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). @@ -25405,9 +24689,9 @@ class document { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -25439,16 +24723,18 @@ class document { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -25456,7 +24742,7 @@ class document { * * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result type() noexcept; /** * Checks whether the document is a scalar (string, number, null, Boolean). @@ -25465,14 +24751,14 @@ class document { * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the document is a negative number. * * @returns true if the number if negative. */ - simdjson_really_inline bool is_negative() noexcept; + simdjson_inline bool is_negative() noexcept; /** * Checks whether the document is an integer number. Note that * this requires to partially parse the number string. If @@ -25482,7 +24768,7 @@ class document { * * @returns true if the number if negative. */ - simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is @@ -25505,7 +24791,7 @@ class document { * * @returns the type of the number */ - simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may @@ -25533,7 +24819,7 @@ class document { * to call "get_int64()" when number.get_number_type() is not * number_type::signed_integer and when number.is_int64() is false. */ - simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** * Get the raw JSON for this token. @@ -25557,7 +24843,7 @@ class document { * - false * - null */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; /** * Reset the iterator inside the document instance so we are pointing back at the @@ -25589,7 +24875,7 @@ class document { * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ - simdjson_really_inline int32_t current_depth() const noexcept; + simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 @@ -25622,26 +24908,26 @@ class document { * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). */ - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; /** * Consumes the document and returns a string_view instance corresponding to the * document as represented in JSON. It points inside the original byte array containing * the JSON document. */ - simdjson_really_inline simdjson_result raw_json() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Consumes the document. */ - simdjson_really_inline error_code consume() noexcept; + simdjson_inline error_code consume() noexcept; - simdjson_really_inline document(ondemand::json_iterator &&iter) noexcept; - simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept; + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; - simdjson_really_inline value_iterator resume_value_iterator() noexcept; - simdjson_really_inline value_iterator get_root_value_iterator() noexcept; - simdjson_really_inline simdjson_result start_or_resume_object() noexcept; - static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept; + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; // // Fields @@ -25665,59 +24951,59 @@ class document { */ class document_reference { public: - simdjson_really_inline document_reference() noexcept; - simdjson_really_inline document_reference(document &d) noexcept; - simdjson_really_inline document_reference(const document_reference &other) noexcept = default; - simdjson_really_inline document_reference& operator=(const document_reference &other) noexcept = default; - simdjson_really_inline void rewind() noexcept; - simdjson_really_inline simdjson_result get_array() & noexcept; - simdjson_really_inline simdjson_result get_object() & noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline simdjson_result get_value() noexcept; - - simdjson_really_inline bool is_null() noexcept; - simdjson_really_inline simdjson_result raw_json() noexcept; - simdjson_really_inline operator document&() const noexcept; + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator array() & noexcept(false); - simdjson_really_inline operator object() & noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); - simdjson_really_inline operator value() noexcept(false); + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); #endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) & noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; - - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; - - simdjson_really_inline simdjson_result current_location() noexcept; - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline bool is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; - simdjson_really_inline simdjson_result raw_json_token() noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; private: document *doc{nullptr}; }; @@ -25730,63 +25016,63 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline error_code rewind() noexcept; - - simdjson_really_inline simdjson_result get_array() & noexcept; - simdjson_really_inline simdjson_result get_object() & noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_double_from_string() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline simdjson_result get_value() noexcept; - simdjson_really_inline bool is_null() noexcept; - - template simdjson_really_inline simdjson_result get() & noexcept; - template simdjson_really_inline simdjson_result get() && noexcept; - - template simdjson_really_inline error_code get(T &out) & noexcept; - template simdjson_really_inline error_code get(T &out) && noexcept; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_from_string() noexcept; + simdjson_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); #endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) & noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; - simdjson_really_inline simdjson_result current_location() noexcept; - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline bool is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; - /** @copydoc simdjson_really_inline std::string_view document::raw_json_token() const noexcept */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; - - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; @@ -25799,55 +25085,55 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept; - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline error_code rewind() noexcept; - - simdjson_really_inline simdjson_result get_array() & noexcept; - simdjson_really_inline simdjson_result get_object() & noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline simdjson_result get_value() noexcept; - simdjson_really_inline bool is_null() noexcept; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); #endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) & noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; - simdjson_really_inline simdjson_result current_location() noexcept; - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline bool is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; - /** @copydoc simdjson_really_inline std::string_view document_reference::raw_json_token() const noexcept */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; - - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; @@ -25866,7 +25152,8 @@ class object; class raw_json_string; /** - * An ephemeral JSON value returned during iteration. + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. */ class value { public: @@ -25875,7 +25162,7 @@ class value { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline value() noexcept = default; + simdjson_inline value() noexcept = default; /** * Get this value as the given type. @@ -25888,7 +25175,7 @@ class value { * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ - template simdjson_really_inline simdjson_result get() noexcept { + template simdjson_inline simdjson_result get() noexcept { // Unless the simdjson library provides an inline implementation, calling this method should // immediately fail. static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); @@ -25903,7 +25190,7 @@ class value { * @returns INCORRECT_TYPE If the JSON value is not an object. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ - template simdjson_really_inline error_code get(T &out) noexcept; + template simdjson_inline error_code get(T &out) noexcept; /** * Cast this JSON value to an array. @@ -25911,7 +25198,7 @@ class value { * @returns An object that can be used to iterate the array. * @returns INCORRECT_TYPE If the JSON value is not an array. */ - simdjson_really_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_array() noexcept; /** * Cast this JSON value to an object. @@ -25919,7 +25206,7 @@ class value { * @returns An object that can be used to look up or iterate fields. * @returns INCORRECT_TYPE If the JSON value is not an object. */ - simdjson_really_inline simdjson_result get_object() noexcept; + simdjson_inline simdjson_result get_object() noexcept; /** * Cast this JSON value to an unsigned integer. @@ -25927,7 +25214,7 @@ class value { * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; /** * Cast this JSON value (inside string) to a unsigned integer. @@ -25935,7 +25222,7 @@ class value { * @returns A unsigned 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; /** * Cast this JSON value to a signed integer. @@ -25943,7 +25230,7 @@ class value { * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ - simdjson_really_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; /** * Cast this JSON value (inside string) to a signed integer. @@ -25951,7 +25238,7 @@ class value { * @returns A signed 64-bit integer. * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. */ - simdjson_really_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; /** * Cast this JSON value to a double. @@ -25959,7 +25246,7 @@ class value { * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ - simdjson_really_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double() noexcept; /** * Cast this JSON value (inside string) to a double @@ -25967,7 +25254,7 @@ class value { * @returns A double. * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. */ - simdjson_really_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; /** * Cast this JSON value to a string. @@ -25983,7 +25270,7 @@ class value { * time it parses a document or when it is destroyed. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_string() noexcept; /** * Cast this JSON value to a raw_json_string. @@ -25993,7 +25280,7 @@ class value { * @returns A pointer to the raw JSON for the given string. * @returns INCORRECT_TYPE if the JSON value is not a string. */ - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; /** * Cast this JSON value to a bool. @@ -26001,14 +25288,17 @@ class value { * @returns A bool value. * @returns INCORRECT_TYPE if the JSON value is not true or false. */ - simdjson_really_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; /** - * Checks if this JSON value is null. + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. * * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. */ - simdjson_really_inline bool is_null() noexcept; + simdjson_inline simdjson_result is_null() noexcept; #if SIMDJSON_EXCEPTIONS /** @@ -26017,35 +25307,35 @@ class value { * @returns An object that can be used to iterate the array. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. */ - simdjson_really_inline operator array() noexcept(false); + simdjson_inline operator array() noexcept(false); /** * Cast this JSON value to an object. * * @returns An object that can be used to look up or iterate fields. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. */ - simdjson_really_inline operator object() noexcept(false); + simdjson_inline operator object() noexcept(false); /** * Cast this JSON value to an unsigned integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. */ - simdjson_really_inline operator uint64_t() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); /** * Cast this JSON value to a signed integer. * * @returns A signed 64-bit integer. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. */ - simdjson_really_inline operator int64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); /** * Cast this JSON value to a double. * * @returns A double. * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. */ - simdjson_really_inline operator double() noexcept(false); + simdjson_inline operator double() noexcept(false); /** * Cast this JSON value to a string. * @@ -26057,7 +25347,7 @@ class value { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); /** * Cast this JSON value to a raw_json_string. * @@ -26066,14 +25356,14 @@ class value { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_really_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); /** * Cast this JSON value to a bool. * * @returns A bool value. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. */ - simdjson_really_inline operator bool() noexcept(false); + simdjson_inline operator bool() noexcept(false); #endif /** @@ -26083,13 +25373,13 @@ class value { * * @returns INCORRECT_TYPE If the JSON value is not an array. */ - simdjson_really_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result begin() & noexcept; /** * Sentinel representing the end of the array. * * Part of the std::iterable interface. */ - simdjson_really_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * This method scans the array and counts the number of elements. * The count_elements method should always be called before you have begun @@ -26100,8 +25390,11 @@ class value { * beginning as if it had never been accessed. If the JSON is malformed (e.g., * there is a missing comma), then an error is returned and it is no longer * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. */ - simdjson_really_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; /** * This method scans the object and counts the number of key-value pairs. * The count_fields method should always be called before you have begun @@ -26115,8 +25408,11 @@ class value { * * To check that an object is empty, it is more performant to use * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. */ - simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; /** * Get the value at the given index in the array. This function has linear-time complexity. * This function should only be called once on an array instance since the array iterator is not reset between each call. @@ -26124,7 +25420,7 @@ class value { * @return The value at the given index, or: * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ - simdjson_really_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; /** * Look up a field by name on an object (order-sensitive). * @@ -26147,9 +25443,9 @@ class value { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field(const char *key) noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -26173,16 +25469,18 @@ class value { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(const char *key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](const char *key) noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -26192,7 +25490,7 @@ class value { * json_type::number, json_type::boolean, or json_type::null). * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result type() noexcept; /** * Checks whether the value is a scalar (string, number, null, Boolean). @@ -26201,14 +25499,14 @@ class value { * @returns true if the type is string, number, null, Boolean * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". */ - simdjson_really_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; /** * Checks whether the value is a negative number. * * @returns true if the number if negative. */ - simdjson_really_inline bool is_negative() noexcept; + simdjson_inline bool is_negative() noexcept; /** * Checks whether the value is an integer number. Note that * this requires to partially parse the number string. If @@ -26222,7 +25520,7 @@ class value { * * @returns true if the number if negative. */ - simdjson_really_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; /** * Determine the number type (integer or floating-point number) as quickly * as possible. This function does not fully validate the input. It is @@ -26245,7 +25543,7 @@ class value { * * @returns the type of the number */ - simdjson_really_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; /** * Attempt to parse an ondemand::number. An ondemand::number may @@ -26277,7 +25575,7 @@ class value { * calling 'get_number()', you scan the number string only once, determining * efficiently the type and storing it in an efficient manner. */ - simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; /** @@ -26303,12 +25601,12 @@ class value { * - false * - null */ - simdjson_really_inline std::string_view raw_json_token() noexcept; + simdjson_inline std::string_view raw_json_token() noexcept; /** * Returns the current location in the document if in bounds. */ - simdjson_really_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_location() noexcept; /** * Returns the current depth in the document if in bounds. @@ -26319,7 +25617,7 @@ class value { * 2 = , or } inside root array/object * 3 = key or value inside root array/object. */ - simdjson_really_inline int32_t current_depth() const noexcept; + simdjson_inline int32_t current_depth() const noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 @@ -26363,38 +25661,38 @@ class value { * - INCORRECT_TYPE if a non-integer is used to access an array * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed */ - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; protected: /** * Create a value. */ - simdjson_really_inline value(const value_iterator &iter) noexcept; + simdjson_inline value(const value_iterator &iter) noexcept; /** * Skip this value, allowing iteration to continue. */ - simdjson_really_inline void skip() noexcept; + simdjson_inline void skip() noexcept; /** * Start a value at the current position. * * (It should already be started; this is just a self-documentation method.) */ - static simdjson_really_inline value start(const value_iterator &iter) noexcept; + static simdjson_inline value start(const value_iterator &iter) noexcept; /** * Resume a value. */ - static simdjson_really_inline value resume(const value_iterator &iter) noexcept; + static simdjson_inline value resume(const value_iterator &iter) noexcept; /** * Get the object, starting or resuming it as necessary */ - simdjson_really_inline simdjson_result start_or_resume_object() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; - // simdjson_really_inline void log_value(const char *type) const noexcept; - // simdjson_really_inline void log_error(const char *message) const noexcept; + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; value_iterator iter{}; @@ -26415,43 +25713,43 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result get_array() noexcept; - simdjson_really_inline simdjson_result get_object() noexcept; + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_int64_in_string() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_double_in_string() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline bool is_null() noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; - template simdjson_really_inline simdjson_result get() noexcept; + template simdjson_inline simdjson_result get() noexcept; - template simdjson_really_inline error_code get(T &out) noexcept; + template simdjson_inline error_code get(T &out) noexcept; #if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); #endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; /** * Look up a field by name on an object (order-sensitive). @@ -26473,9 +25771,9 @@ struct simdjson_result : publi * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field(const char *key) noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -26496,13 +25794,13 @@ struct simdjson_result : publi * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(const char *key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](const char *key) noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; /** * Get the type of this JSON value. @@ -26511,21 +25809,21 @@ struct simdjson_result : publi * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). */ - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; - simdjson_really_inline simdjson_result is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; - - /** @copydoc simdjson_really_inline std::string_view value::raw_json_token() const noexcept */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; - - /** @copydoc simdjson_really_inline simdjson_result current_location() noexcept */ - simdjson_really_inline simdjson_result current_location() noexcept; - /** @copydoc simdjson_really_inline int32_t current_depth() const noexcept */ - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; }; } // namespace simdjson @@ -26550,7 +25848,7 @@ class field : public std::pair { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline field() noexcept; + simdjson_inline field() noexcept; /** * Get the key as a string_view (for higher speed, consider raw_key). @@ -26560,25 +25858,25 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_really_inline simdjson_warn_unused simdjson_result unescaped_key() noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key() noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with * an unescaped C string: e.g., key() == "test". */ - simdjson_really_inline raw_json_string key() const noexcept; + simdjson_inline raw_json_string key() const noexcept; /** * Get the field value. */ - simdjson_really_inline ondemand::value &value() & noexcept; + simdjson_inline ondemand::value &value() & noexcept; /** * @overload ondemand::value &ondemand::value() & noexcept */ - simdjson_really_inline ondemand::value value() && noexcept; + simdjson_inline ondemand::value value() && noexcept; protected: - simdjson_really_inline field(raw_json_string key, ondemand::value &&value) noexcept; - static simdjson_really_inline simdjson_result start(value_iterator &parent_iter) noexcept; - static simdjson_really_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; friend struct simdjson_result; friend class object_iterator; }; @@ -26592,13 +25890,13 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; - simdjson_really_inline simdjson_result unescaped_key() noexcept; - simdjson_really_inline simdjson_result key() noexcept; - simdjson_really_inline simdjson_result value() noexcept; + simdjson_inline simdjson_result unescaped_key() noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result value() noexcept; }; } // namespace simdjson @@ -26619,10 +25917,10 @@ class object { * * Exists so you can declare a variable and later assign to it before use. */ - simdjson_really_inline object() noexcept = default; + simdjson_inline object() noexcept = default; - simdjson_really_inline simdjson_result begin() noexcept; - simdjson_really_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; /** * Look up a field by name on an object (order-sensitive). * @@ -26643,7 +25941,8 @@ class object { * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. * * You must consume the fields on an object one at a time. A request for a new key - * invalidates previous field values: it makes them unsafe. E.g., the array + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an @@ -26656,9 +25955,9 @@ class object { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; /** * Look up a field by name on an object, without regard to key order. @@ -26680,7 +25979,8 @@ class object { * that only one field is returned. * * You must consume the fields on an object one at a time. A request for a new key - * invalidates previous field values: it makes them unsafe. E.g., the array + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array * given by content["bids"].get_array() should not be accessed after you have called * content["asks"].get_array(). You can detect such mistakes by first compiling and running * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an @@ -26692,13 +25992,13 @@ class object { * @param key The key to look up. * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; /** * Get the value associated with the given JSON pointer. We use the RFC 6901 @@ -26769,27 +26069,30 @@ class object { * * To check that an object is empty, it is more performant to use * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. */ - simdjson_really_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; /** * Consumes the object and returns a string_view instance corresponding to the * object as represented in JSON. It points inside the original byte array containing * the JSON document. */ - simdjson_really_inline simdjson_result raw_json() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; protected: /** * Go to the end of the object, no matter where you are right now. */ - simdjson_really_inline error_code consume() noexcept; - static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; - static simdjson_really_inline simdjson_result start_root(value_iterator &iter) noexcept; - static simdjson_really_inline simdjson_result started(value_iterator &iter) noexcept; - static simdjson_really_inline object resume(const value_iterator &iter) noexcept; - simdjson_really_inline object(const value_iterator &iter) noexcept; + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code find_field_raw(const std::string_view key) noexcept; + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; value_iterator iter{}; @@ -26807,19 +26110,19 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - - simdjson_really_inline simdjson_result begin() noexcept; - simdjson_really_inline simdjson_result end() noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) && noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; @@ -26871,9 +26174,9 @@ class parser { inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; inline parser(parser &&other) noexcept = default; - simdjson_really_inline parser(const parser &other) = delete; - simdjson_really_inline parser &operator=(const parser &other) = delete; - simdjson_really_inline parser &operator=(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; /** Deallocate the JSON parser. */ inline ~parser() noexcept = default; @@ -27056,17 +26359,26 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_really_inline size_t capacity() const noexcept; + simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_really_inline size_t max_capacity() const noexcept; - simdjson_really_inline void set_max_capacity(size_t max_capacity) noexcept; - /** The maximum depth of this parser (the most deeply nested objects and arrays it can process). */ - simdjson_really_inline size_t max_depth() const noexcept; + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length * and `max_depth` depth. * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * * @param capacity The new capacity. * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. * @return The error, if there is one. @@ -27082,6 +26394,27 @@ class parser { bool threaded{true}; #endif + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst) const noexcept; private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -27089,7 +26422,7 @@ class parser { size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -27106,9 +26439,9 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson @@ -27196,13 +26529,13 @@ class document_stream { * auto error = parser.iterate_many(json).get(docs); * ``` */ - simdjson_really_inline document_stream() noexcept; + simdjson_inline document_stream() noexcept; /** Move one document_stream to another. */ - simdjson_really_inline document_stream(document_stream &&other) noexcept = default; + simdjson_inline document_stream(document_stream &&other) noexcept = default; /** Move one document_stream to another. */ - simdjson_really_inline document_stream &operator=(document_stream &&other) noexcept = default; + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; - simdjson_really_inline ~document_stream() noexcept; + simdjson_inline ~document_stream() noexcept; /** * Returns the input size in bytes. @@ -27241,11 +26574,11 @@ class document_stream { /** * Default constructor. */ - simdjson_really_inline iterator() noexcept; + simdjson_inline iterator() noexcept; /** * Get the current document (or error). */ - simdjson_really_inline simdjson_result operator*() noexcept; + simdjson_inline simdjson_result operator*() noexcept; /** * Advance to the next document (prefix). */ @@ -27254,7 +26587,7 @@ class document_stream { * Check if we're at the end yet. * @param other the end iterator to compare to. */ - simdjson_really_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator!=(const iterator &other) const noexcept; /** * @private * @@ -27270,7 +26603,7 @@ class document_stream { * may change in future versions of simdjson: we find the API somewhat * awkward and we would like to offer something friendlier. */ - simdjson_really_inline size_t current_index() const noexcept; + simdjson_inline size_t current_index() const noexcept; /** * @private @@ -27291,7 +26624,7 @@ class document_stream { * awkward and we would like to offer something friendlier. * */ - simdjson_really_inline std::string_view source() const noexcept; + simdjson_inline std::string_view source() const noexcept; /** * Returns error of the stream (if any). @@ -27299,7 +26632,7 @@ class document_stream { inline error_code error() const noexcept; private: - simdjson_really_inline iterator(document_stream *s, bool finished) noexcept; + simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ document_stream* stream; /** Whether we're finished or not. */ @@ -27313,11 +26646,11 @@ class document_stream { /** * Start iterating the documents in the stream. */ - simdjson_really_inline iterator begin() noexcept; + simdjson_inline iterator begin() noexcept; /** * The end of the stream, for iterator comparison purposes. */ - simdjson_really_inline iterator end() noexcept; + simdjson_inline iterator end() noexcept; private: @@ -27333,7 +26666,7 @@ class document_stream { * @param len is the length of the raw byte buffer in bytes * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) */ - simdjson_really_inline document_stream( + simdjson_inline document_stream( ondemand::parser &parser, const uint8_t *buf, size_t len, @@ -27437,9 +26770,9 @@ namespace simdjson { template<> struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; }; } // namespace simdjson @@ -27449,23 +26782,27 @@ struct simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept; /** * Create a string-view instance out of a value instance. The string-view instance * contains JSON text that is suitable to be parsed as JSON again. The value must - * not have been accessed previously. + * not have been accessed previously. It does not + * validate the content. */ inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept; /** * Create a string-view instance out of an object instance. The string-view instance - * contains JSON text that is suitable to be parsed as JSON again. + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. */ inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept; /** * Create a string-view instance out of an array instance. The string-view instance - * contains JSON text that is suitable to be parsed as JSON again. + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. */ inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept; inline simdjson_result to_json_string(simdjson_result x); @@ -27484,7 +26821,8 @@ inline simdjson_result to_json_string(simdjson_result x); #endif /** - * Print JSON to an output stream. + * Print JSON to an output stream. It does not + * validate the content. * * @param out The output stream. * @param value The array. @@ -27506,7 +26845,8 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IM inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); #endif /** - * Print JSON to an output stream. + * Print JSON to an output stream. It does not + * validate the content. * * @param out The output stream. * @param value The array. @@ -27521,7 +26861,8 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IM inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); #endif /** - * Print JSON to an output stream. + * Print JSON to an output stream. It does not + * validate the content. * * @param out The output stream. * @param value The object. @@ -27545,7 +26886,7 @@ namespace SIMDJSON_BUILTIN_IMPLEMENTATION { // template -simdjson_really_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { error = this->second; if (!error) { value = std::forward>(*this).first; @@ -27553,66 +26894,66 @@ simdjson_really_inline void implementation_simdjson_result_base::tie(T &value } template -simdjson_warn_unused simdjson_really_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { error_code error; std::forward>(*this).tie(value, error); return error; } template -simdjson_really_inline error_code implementation_simdjson_result_base::error() const noexcept { +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { return this->second; } #if SIMDJSON_EXCEPTIONS template -simdjson_really_inline T& implementation_simdjson_result_base::value() & noexcept(false) { +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return this->first; } template -simdjson_really_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { return std::forward>(*this).take_value(); } template -simdjson_really_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { if (error()) { throw simdjson_error(error()); } return std::forward(this->first); } template -simdjson_really_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { return std::forward>(*this).take_value(); } #endif // SIMDJSON_EXCEPTIONS template -simdjson_really_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { return this->first; } template -simdjson_really_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { return this->first; } template -simdjson_really_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { return std::forward(this->first); } template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept : first{std::forward(value)}, second{error} {} template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept : implementation_simdjson_result_base(T{}, error) {} template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} } // namespace SIMDJSON_BUILTIN_IMPLEMENTATION @@ -27654,48 +26995,48 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result &t -simdjson_really_inline number_type number::get_number_type() const noexcept { +simdjson_inline number_type number::get_number_type() const noexcept { return type; } -simdjson_really_inline bool number::is_uint64() const noexcept { +simdjson_inline bool number::is_uint64() const noexcept { return get_number_type() == number_type::unsigned_integer; } -simdjson_really_inline uint64_t number::get_uint64() const noexcept { +simdjson_inline uint64_t number::get_uint64() const noexcept { return payload.unsigned_integer; } -simdjson_really_inline number::operator uint64_t() const noexcept { +simdjson_inline number::operator uint64_t() const noexcept { return get_uint64(); } -simdjson_really_inline bool number::is_int64() const noexcept { +simdjson_inline bool number::is_int64() const noexcept { return get_number_type() == number_type::signed_integer; } -simdjson_really_inline int64_t number::get_int64() const noexcept { +simdjson_inline int64_t number::get_int64() const noexcept { return payload.signed_integer; } -simdjson_really_inline number::operator int64_t() const noexcept { +simdjson_inline number::operator int64_t() const noexcept { return get_int64(); } -simdjson_really_inline bool number::is_double() const noexcept { +simdjson_inline bool number::is_double() const noexcept { return get_number_type() == number_type::floating_point_number; } -simdjson_really_inline double number::get_double() const noexcept { +simdjson_inline double number::get_double() const noexcept { return payload.floating_point_number; } -simdjson_really_inline number::operator double() const noexcept { +simdjson_inline number::operator double() const noexcept { return get_double(); } -simdjson_really_inline double number::as_double() const noexcept { +simdjson_inline double number::as_double() const noexcept { if(is_double()) { return payload.floating_point_number; } @@ -27705,22 +27046,22 @@ simdjson_really_inline double number::as_double() const noexcept { return double(payload.unsigned_integer); } -simdjson_really_inline void number::append_s64(int64_t value) noexcept { +simdjson_inline void number::append_s64(int64_t value) noexcept { payload.signed_integer = value; type = number_type::signed_integer; } -simdjson_really_inline void number::append_u64(uint64_t value) noexcept { +simdjson_inline void number::append_u64(uint64_t value) noexcept { payload.unsigned_integer = value; type = number_type::unsigned_integer; } -simdjson_really_inline void number::append_double(double value) noexcept { +simdjson_inline void number::append_double(double value) noexcept { payload.floating_point_number = value; type = number_type::floating_point_number; } -simdjson_really_inline void number::skip_double() noexcept { +simdjson_inline void number::skip_double() noexcept { type = number_type::floating_point_number; } @@ -27730,9 +27071,9 @@ simdjson_really_inline void number::skip_double() noexcept { namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson @@ -27892,7 +27233,7 @@ inline void log_line(const json_iterator &iter, token_position index, depth_t de printf(" "); } // printf("| %5u ", *(index+1)); - printf("| %5u ", depth); + printf("| %5i ", depth); printf("| %.*s ", int(detail.size()), detail.data()); printf("|\n"); fflush(stdout); @@ -27910,18 +27251,12 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_really_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } -simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(uint8_t *&dst) const noexcept { - uint8_t *end = stringparsing::parse_string(buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; - return result; -} +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } -simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;pos < target.size() && target[pos] != '\\';pos++) {} @@ -27939,7 +27274,7 @@ simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(std::s return true; } -simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { size_t pos{0}; // if the content has no escape character, just scan through it quickly! for(;target[pos] && target[pos] != '\\';pos++) {} @@ -27958,12 +27293,12 @@ simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(const } -simdjson_really_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { // If we are going to call memcmp, then we must know something about the length of the raw_json_string. return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); } -simdjson_really_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. if(target.size() <= SIMDJSON_PADDING) { @@ -27978,7 +27313,7 @@ simdjson_really_inline bool raw_json_string::unsafe_is_equal(std::string_view ta return true; } -simdjson_really_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { const char * r{raw()}; size_t pos{0}; bool escaping{false}; @@ -28002,7 +27337,7 @@ simdjson_really_inline bool raw_json_string::is_equal(std::string_view target) c } -simdjson_really_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; @@ -28014,7 +27349,7 @@ simdjson_really_inline bool raw_json_string::unsafe_is_equal(const char * target return true; } -simdjson_really_inline bool raw_json_string::is_equal(const char* target) const noexcept { +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { // Assumptions: does not contain unescaped quote characters, and // the raw content is quote terminated within a valid JSON string. const char * r{raw()}; @@ -28039,29 +27374,29 @@ simdjson_really_inline bool raw_json_string::is_equal(const char* target) const return true; } -simdjson_unused simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { return a.unsafe_is_equal(c); } -simdjson_unused simdjson_really_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { return a == c; } -simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { return !(a == c); } -simdjson_unused simdjson_really_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { return !(a == c); } -simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter) const noexcept { - return unescape(iter.string_buf_loc()); +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter) const noexcept { + return iter.unescape(*this); } -simdjson_unused simdjson_really_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { bool in_escape = false; const char *s = str.raw(); while (true) { @@ -28081,20 +27416,16 @@ simdjson_unused simdjson_really_inline std::ostream &operator<<(std::ostream &ou namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} -simdjson_really_inline simdjson_result simdjson_result::raw() const noexcept { +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { if (error()) { return error(); } return first.raw(); } -simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(uint8_t *&dst) const noexcept { - if (error()) { return error(); } - return first.unescape(dst); -} -simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept { +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept { if (error()) { return error(); } return first.unescape(iter); } @@ -28106,65 +27437,65 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline token_iterator::token_iterator( +simdjson_inline token_iterator::token_iterator( const uint8_t *_buf, token_position position ) noexcept : buf{_buf}, _position{position} { } -simdjson_really_inline uint32_t token_iterator::current_offset() const noexcept { +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { return *(_position); } -simdjson_really_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { return &buf[*(_position++)]; } -simdjson_really_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { return &buf[*position]; } -simdjson_really_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { return *position; } -simdjson_really_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { return *(position+1) - *position; } -simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { return &buf[*(_position+delta)]; } -simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { return *(_position+delta); } -simdjson_really_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { return *(_position+delta+1) - *(_position+delta); } -simdjson_really_inline token_position token_iterator::position() const noexcept { +simdjson_inline token_position token_iterator::position() const noexcept { return _position; } -simdjson_really_inline void token_iterator::set_position(token_position target_position) noexcept { +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { _position = target_position; } -simdjson_really_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { return _position == other._position; } -simdjson_really_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { return _position != other._position; } -simdjson_really_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { return _position > other._position; } -simdjson_really_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { return _position >= other._position; } -simdjson_really_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { return _position < other._position; } -simdjson_really_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { return _position <= other._position; } @@ -28174,9 +27505,9 @@ simdjson_really_inline bool token_iterator::operator<=(const token_iterator &oth namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson @@ -28186,7 +27517,7 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline json_iterator::json_iterator(json_iterator &&other) noexcept +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept : token(std::forward(other.token)), parser{other.parser}, _string_buf_loc{other._string_buf_loc}, @@ -28197,7 +27528,7 @@ simdjson_really_inline json_iterator::json_iterator(json_iterator &&other) noexc { other.parser = nullptr; } -simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { token = other.token; parser = other.parser; _string_buf_loc = other._string_buf_loc; @@ -28209,7 +27540,7 @@ simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&o return *this; } -simdjson_really_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept : token(buf, &_parser->implementation->structural_indexes[0]), parser{_parser}, _string_buf_loc{parser->string_buf.get()}, @@ -28231,12 +27562,33 @@ inline void json_iterator::rewind() noexcept { _depth = 1; } +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + // GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller // relating depth and parent_depth, which is a desired effect. The warning does not show up if the // skip_child() function is not marked inline). SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { if (depth() <= parent_depth) { return SUCCESS; } switch (*return_current_and_advance()) { // TODO consider whether matching braces is a requirement: if non-matching braces indicates @@ -28313,23 +27665,27 @@ simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child SIMDJSON_POP_DISABLE_WARNINGS -simdjson_really_inline bool json_iterator::at_root() const noexcept { +simdjson_inline bool json_iterator::at_root() const noexcept { return position() == root_position(); } -simdjson_really_inline bool json_iterator::streaming() const noexcept { +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { return _streaming; } -simdjson_really_inline token_position json_iterator::root_position() const noexcept { +simdjson_inline token_position json_iterator::root_position() const noexcept { return _root; } -simdjson_really_inline void json_iterator::assert_at_document_depth() const noexcept { +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); } -simdjson_really_inline void json_iterator::assert_at_root() const noexcept { +simdjson_inline void json_iterator::assert_at_root() const noexcept { SIMDJSON_ASSUME( _depth == 1 ); #ifndef SIMDJSON_CLANG_VISUAL_STUDIO // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument @@ -28338,21 +27694,21 @@ simdjson_really_inline void json_iterator::assert_at_root() const noexcept { #endif } -simdjson_really_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { assert_valid_position(token._position + required_tokens - 1); } -simdjson_really_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); #endif } -simdjson_really_inline bool json_iterator::at_end() const noexcept { +simdjson_inline bool json_iterator::at_end() const noexcept { return position() == end_position(); } -simdjson_really_inline token_position json_iterator::end_position() const noexcept { +simdjson_inline token_position json_iterator::end_position() const noexcept { uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; return &parser->implementation->structural_indexes[n_structural_indexes]; } @@ -28381,42 +27737,42 @@ inline simdjson_result json_iterator::current_location() noexcept return reinterpret_cast(token.peek()); } -simdjson_really_inline bool json_iterator::is_alive() const noexcept { +simdjson_inline bool json_iterator::is_alive() const noexcept { return parser; } -simdjson_really_inline void json_iterator::abandon() noexcept { +simdjson_inline void json_iterator::abandon() noexcept { parser = nullptr; _depth = 0; } -simdjson_really_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(); #endif // SIMDJSON_CHECK_EOF return token.return_current_and_advance(); } -simdjson_really_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { // deliberately done without safety guard: return token.peek(0); } -simdjson_really_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // SIMDJSON_CHECK_EOF return token.peek(delta); } -simdjson_really_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { #if SIMDJSON_CHECK_EOF assert_more_tokens(delta+1); #endif // #if SIMDJSON_CHECK_EOF return token.peek_length(delta); } -simdjson_really_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { // todo: currently we require end-of-string buffering, but the following // assert_valid_position should be turned on if/when we lift that condition. // assert_valid_position(position); @@ -28425,14 +27781,14 @@ simdjson_really_inline const uint8_t *json_iterator::peek(token_position positio return token.peek(position); } -simdjson_really_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { #if SIMDJSON_CHECK_EOF assert_valid_position(position); #endif // SIMDJSON_CHECK_EOF return token.peek_length(position); } -simdjson_really_inline token_position json_iterator::last_position() const noexcept { +simdjson_inline token_position json_iterator::last_position() const noexcept { // The following line fails under some compilers... // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); // since it has side-effects. @@ -28440,46 +27796,51 @@ simdjson_really_inline token_position json_iterator::last_position() const noexc SIMDJSON_ASSUME(n_structural_indexes > 0); return &parser->implementation->structural_indexes[n_structural_indexes - 1]; } -simdjson_really_inline const uint8_t *json_iterator::peek_last() const noexcept { +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { return token.peek(last_position()); } -simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); SIMDJSON_ASSUME(_depth == parent_depth + 1); _depth = parent_depth; } -simdjson_really_inline void json_iterator::descend_to(depth_t child_depth) noexcept { +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); _depth = child_depth; } -simdjson_really_inline depth_t json_iterator::depth() const noexcept { +simdjson_inline depth_t json_iterator::depth() const noexcept { return _depth; } -simdjson_really_inline uint8_t *&json_iterator::string_buf_loc() noexcept { +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { return _string_buf_loc; } -simdjson_really_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); logger::log_error(*this, message); error = _error; return error; } -simdjson_really_inline token_position json_iterator::position() const noexcept { +simdjson_inline token_position json_iterator::position() const noexcept { return token.position(); } -simdjson_really_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in) noexcept { + return parser->unescape(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); SIMDJSON_ASSUME(_depth == child_depth - 1); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS #ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); #endif #endif @@ -28487,27 +27848,29 @@ simdjson_really_inline void json_iterator::reenter_child(token_position position _depth = child_depth; } -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS -simdjson_really_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - return parser->start_positions[depth]; +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; } -simdjson_really_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - parser->start_positions[depth] = position; +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } } #endif -simdjson_really_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); logger::log_error(*this, message); return _error; } template -simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept { +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept { // Let us guard against silly cases: if((N < max_len) || (N == 0)) { return false; } // Truncate whitespace to fit the buffer. @@ -28528,9 +27891,9 @@ simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(c namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson @@ -28540,7 +27903,7 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline value_iterator::value_iterator( +simdjson_inline value_iterator::value_iterator( json_iterator *json_iter, depth_t depth, token_position start_position @@ -28548,19 +27911,19 @@ simdjson_really_inline value_iterator::value_iterator( { } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_object() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_object(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_object() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { SIMDJSON_TRY( start_container('{', "Not an object", "object") ); return started_root_object(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_object() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { assert_at_container_start(); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif if (*_json_iter->peek() == '}') { @@ -28572,19 +27935,32 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator return true; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_object() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. - if (! _json_iter->streaming() && (*_json_iter->peek_last() != '}')) { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + if ( ! _json_iter->streaming() ) { + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } } return started_object(); } -simdjson_warn_unused simdjson_really_inline error_code value_iterator::end_container() noexcept { +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { #if SIMDJSON_CHECK_EOF if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } @@ -28593,7 +27969,7 @@ simdjson_warn_unused simdjson_really_inline error_code value_iterator::end_conta return SUCCESS; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_field() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { assert_at_next(); // It's illegal to call this unless there are more tokens: anything that ends in } or ] is @@ -28610,7 +27986,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator } } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { error_code error; bool has_value; // @@ -28636,7 +28012,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // ``` // } else if (!is_open()) { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. @@ -28661,7 +28037,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator } else { if ((error = skip_child() )) { abandon(); return error; } if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } @@ -28705,7 +28081,9 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator return false; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { /** * When find_field_unordered_raw is called, we can either be pointing at the * first key, pointing outside (at the closing brace) or if a key was matched @@ -28746,7 +28124,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // } else if (!is_open()) { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS // If we're past the end of the object, we're being iterated out of order. // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, // this object iterator will blithely scan that object for fields. @@ -28775,7 +28153,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator if ((error = skip_child() )) { abandon(); return error; } search_start = _json_iter->position(); if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } #endif } @@ -28892,8 +28270,9 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator // never reach this point. return false; } +SIMDJSON_POP_DISABLE_WARNINGS -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::field_key() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { assert_at_next(); const uint8_t *key = _json_iter->return_current_and_advance(); @@ -28901,7 +28280,7 @@ simdjson_warn_unused simdjson_really_inline simdjson_result val return raw_json_string(key); } -simdjson_warn_unused simdjson_really_inline error_code value_iterator::field_value() noexcept { +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { assert_at_next(); if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } @@ -28909,12 +28288,12 @@ simdjson_warn_unused simdjson_really_inline error_code value_iterator::field_val return SUCCESS; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_array() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_array(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_array() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { SIMDJSON_TRY( start_container('[', "Not an array", "array") ); return started_root_array(); } @@ -28926,7 +28305,7 @@ inline std::string value_iterator::to_string() const noexcept { return answer; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_array() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { assert_at_container_start(); if (*_json_iter->peek() == ']') { logger::log_value(*_json_iter, "empty array"); @@ -28935,25 +28314,38 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator return false; } _json_iter->descend_to(depth()+1); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS _json_iter->set_start_position(_depth, start_position()); #endif return true; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_array() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { // When in streaming mode, we cannot expect peek_last() to be the last structural element of the // current document. It only works in the normal mode where we have indexed a single document. // Note that adding a check for 'streaming' is not expensive since we only have at most // one root element. - if ( ! _json_iter->streaming() && (*_json_iter->peek_last() != ']')) { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + if ( ! _json_iter->streaming() ) { + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } } return started_array(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_element() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { assert_at_next(); logger::log_event(*this, "has_next_element"); @@ -28970,96 +28362,106 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator } } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { auto not_true = atomparsing::str4ncmp(json, "true"); auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); if (error) { return incorrect_type_error("Not a boolean"); } return simdjson_result(!not_true); } -simdjson_really_inline bool value_iterator::parse_null(const uint8_t *json) const noexcept { - return !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_string() noexcept { - return get_raw_json_string().unescape(_json_iter->string_buf_loc()); +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string() noexcept { + return get_raw_json_string().unescape(json_iter()); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_raw_json_string() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { auto json = peek_scalar("string"); if (*json != '"') { return incorrect_type_error("Not a string"); } advance_scalar("string"); return raw_json_string(json+1); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64_in_string() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { auto result = numberparsing::parse_double(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double_in_string() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_bool() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { auto result = parse_bool(peek_non_root_scalar("bool")); if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } return result; } -simdjson_really_inline bool value_iterator::is_null() noexcept { - auto result = parse_null(peek_non_root_scalar("null")); - if(result) { advance_non_root_scalar("null"); } - return result; +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; } -simdjson_really_inline bool value_iterator::is_negative() noexcept { +simdjson_inline bool value_iterator::is_negative() noexcept { return numberparsing::is_negative(peek_non_root_scalar("numbersign")); } -simdjson_really_inline bool value_iterator::is_root_negative() noexcept { +simdjson_inline bool value_iterator::is_root_negative() noexcept { return numberparsing::is_negative(peek_root_scalar("numbersign")); } -simdjson_really_inline simdjson_result value_iterator::is_integer() noexcept { +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { return numberparsing::is_integer(peek_non_root_scalar("integer")); } -simdjson_really_inline simdjson_result value_iterator::get_number_type() noexcept { +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { return numberparsing::get_number_type(peek_non_root_scalar("integer")); } -simdjson_really_inline simdjson_result value_iterator::get_number() noexcept { +simdjson_inline simdjson_result value_iterator::get_number() noexcept { number num; error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); if(error) { return error; } return num; } -simdjson_really_inline simdjson_result value_iterator::is_root_integer() noexcept { +simdjson_inline simdjson_result value_iterator::is_root_integer() noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("is_root_integer"); uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return false; // if there are more than 20 characters, it cannot be represented as an integer. } - return numberparsing::is_integer(tmpbuf); + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if((answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; } -simdjson_really_inline simdjson_result value_iterator::get_root_number_type() noexcept { +simdjson_inline simdjson_result value_iterator::get_root_number_type() noexcept { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, @@ -29070,9 +28472,14 @@ simdjson_really_inline simdjson_resultis_single_token())) { return TRAILING_CONTENT; } + return answer; } -simdjson_really_inline simdjson_result value_iterator::get_root_number() noexcept { +simdjson_inline simdjson_result value_iterator::get_root_number() noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("number"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, @@ -29086,17 +28493,18 @@ simdjson_really_inline simdjson_result value_iterator::get_root_number() number num; error_code error = numberparsing::parse_number(tmpbuf, num); if(error) { return error; } + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } advance_root_scalar("number"); return num; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_string() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string() noexcept { return get_string(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_raw_json_string() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string() noexcept { return get_raw_json_string(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64() noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer @@ -29105,10 +28513,13 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iter return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("uint64"); } + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64_in_string() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string() noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("uint64"); uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer @@ -29117,10 +28528,13 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iter return NUMBER_ERROR; } auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("uint64"); } + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64() noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer @@ -29130,10 +28544,13 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_itera } auto result = numberparsing::parse_integer(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("int64"); } + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64_in_string() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string() noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("int64"); uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer @@ -29143,10 +28560,13 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_itera } auto result = numberparsing::parse_integer_in_string(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("int64"); } + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double() noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, @@ -29158,11 +28578,14 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterat return NUMBER_ERROR; } auto result = numberparsing::parse_double(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("double"); } + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double_in_string() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string() noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("double"); // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, @@ -29174,19 +28597,27 @@ simdjson_warn_unused simdjson_really_inline simdjson_result value_iterat return NUMBER_ERROR; } auto result = numberparsing::parse_double_in_string(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("double"); } + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } return result; } -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_bool() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool() noexcept { auto max_len = peek_start_length(); auto json = peek_root_scalar("bool"); uint8_t tmpbuf[5+1]; if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); } auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("bool"); } + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } return result; } -simdjson_really_inline bool value_iterator::is_root_null() noexcept { +simdjson_inline bool value_iterator::is_root_null() noexcept { + // If there is trailing content, then the document is not null. + if (!_json_iter->is_single_token()) { return false; } auto max_len = peek_start_length(); auto json = peek_root_scalar("null"); bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && @@ -29195,14 +28626,14 @@ simdjson_really_inline bool value_iterator::is_root_null() noexcept { return result; } -simdjson_warn_unused simdjson_really_inline error_code value_iterator::skip_child() noexcept { +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); return _json_iter->skip_child(depth()); } -simdjson_really_inline value_iterator value_iterator::child() const noexcept { +simdjson_inline value_iterator value_iterator::child() const noexcept { assert_at_child(); return { _json_iter, depth()+1, _json_iter->token.position() }; } @@ -29212,52 +28643,52 @@ simdjson_really_inline value_iterator value_iterator::child() const noexcept { // marked non-inline. SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_really_inline bool value_iterator::is_open() const noexcept { +simdjson_inline bool value_iterator::is_open() const noexcept { return _json_iter->depth() >= depth(); } SIMDJSON_POP_DISABLE_WARNINGS -simdjson_really_inline bool value_iterator::at_end() const noexcept { +simdjson_inline bool value_iterator::at_end() const noexcept { return _json_iter->at_end(); } -simdjson_really_inline bool value_iterator::at_start() const noexcept { +simdjson_inline bool value_iterator::at_start() const noexcept { return _json_iter->token.position() == start_position(); } -simdjson_really_inline bool value_iterator::at_first_field() const noexcept { +simdjson_inline bool value_iterator::at_first_field() const noexcept { SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); return _json_iter->token.position() == start_position() + 1; } -simdjson_really_inline void value_iterator::abandon() noexcept { +simdjson_inline void value_iterator::abandon() noexcept { _json_iter->abandon(); } -simdjson_warn_unused simdjson_really_inline depth_t value_iterator::depth() const noexcept { +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { return _depth; } -simdjson_warn_unused simdjson_really_inline error_code value_iterator::error() const noexcept { +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { return _json_iter->error; } -simdjson_warn_unused simdjson_really_inline uint8_t *&value_iterator::string_buf_loc() noexcept { +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { return _json_iter->string_buf_loc(); } -simdjson_warn_unused simdjson_really_inline const json_iterator &value_iterator::json_iter() const noexcept { +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { return *_json_iter; } -simdjson_warn_unused simdjson_really_inline json_iterator &value_iterator::json_iter() noexcept { +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { return *_json_iter; } -simdjson_really_inline const uint8_t *value_iterator::peek_start() const noexcept { +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { return _json_iter->peek(start_position()); } -simdjson_really_inline uint32_t value_iterator::peek_start_length() const noexcept { +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { return _json_iter->peek_length(start_position()); } -simdjson_really_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return peek_start(); } @@ -29267,7 +28698,7 @@ simdjson_really_inline const uint8_t *value_iterator::peek_scalar(const char *ty return _json_iter->peek(); } -simdjson_really_inline void value_iterator::advance_scalar(const char *type) noexcept { +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. if (!is_at_start()) { return; } @@ -29278,12 +28709,12 @@ simdjson_really_inline void value_iterator::advance_scalar(const char *type) noe _json_iter->ascend_to(depth()-1); } -simdjson_really_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { logger::log_start_value(*_json_iter, start_position(), depth(), type); // If we're not at the position anymore, we don't want to advance the cursor. const uint8_t *json; if (!is_at_start()) { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif json = peek_start(); @@ -29305,14 +28736,14 @@ simdjson_really_inline error_code value_iterator::start_container(uint8_t start_ } -simdjson_really_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } assert_at_root(); return _json_iter->peek(); } -simdjson_really_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return peek_start(); } @@ -29320,7 +28751,7 @@ simdjson_really_inline const uint8_t *value_iterator::peek_non_root_scalar(const return _json_iter->peek(); } -simdjson_really_inline void value_iterator::advance_root_scalar(const char *type) noexcept { +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } @@ -29328,7 +28759,7 @@ simdjson_really_inline void value_iterator::advance_root_scalar(const char *type _json_iter->return_current_and_advance(); _json_iter->ascend_to(depth()-1); } -simdjson_really_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { logger::log_value(*_json_iter, start_position(), depth(), type); if (!is_at_start()) { return; } @@ -29337,23 +28768,23 @@ simdjson_really_inline void value_iterator::advance_non_root_scalar(const char * _json_iter->ascend_to(depth()-1); } -simdjson_really_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { logger::log_error(*_json_iter, start_position(), depth(), message); return INCORRECT_TYPE; } -simdjson_really_inline bool value_iterator::is_at_start() const noexcept { +simdjson_inline bool value_iterator::is_at_start() const noexcept { return position() == start_position(); } -simdjson_really_inline bool value_iterator::is_at_key() const noexcept { +simdjson_inline bool value_iterator::is_at_key() const noexcept { // Keys are at the same depth as the object. // Note here that we could be safer and check that we are within an object, // but we do not. return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; } -simdjson_really_inline bool value_iterator::is_at_iterator_start() const noexcept { +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). auto delta = position() - start_position(); return delta == 1 || delta == 2; @@ -29377,22 +28808,22 @@ inline void value_iterator::assert_at_next() const noexcept { SIMDJSON_ASSUME( _depth > 0 ); } -simdjson_really_inline void value_iterator::move_at_start() noexcept { +simdjson_inline void value_iterator::move_at_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position); } -simdjson_really_inline void value_iterator::move_at_container_start() noexcept { +simdjson_inline void value_iterator::move_at_container_start() noexcept { _json_iter->_depth = _depth; _json_iter->token.set_position(_start_position + 1); } -simdjson_really_inline simdjson_result value_iterator::reset_array() noexcept { +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { move_at_container_start(); return started_array(); } -simdjson_really_inline simdjson_result value_iterator::reset_object() noexcept { +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { move_at_container_start(); return started_object(); } @@ -29417,11 +28848,11 @@ inline void value_iterator::assert_is_valid() const noexcept { SIMDJSON_ASSUME( _json_iter != nullptr ); } -simdjson_really_inline bool value_iterator::is_valid() const noexcept { +simdjson_inline bool value_iterator::is_valid() const noexcept { return _json_iter != nullptr; } -simdjson_really_inline simdjson_result value_iterator::type() const noexcept { +simdjson_inline simdjson_result value_iterator::type() const noexcept { switch (*peek_start()) { case '{': return json_type::object; @@ -29442,23 +28873,23 @@ simdjson_really_inline simdjson_result value_iterator::type() const n } } -simdjson_really_inline token_position value_iterator::start_position() const noexcept { +simdjson_inline token_position value_iterator::start_position() const noexcept { return _start_position; } -simdjson_really_inline token_position value_iterator::position() const noexcept { +simdjson_inline token_position value_iterator::position() const noexcept { return _json_iter->position(); } -simdjson_really_inline token_position value_iterator::end_position() const noexcept { +simdjson_inline token_position value_iterator::end_position() const noexcept { return _json_iter->end_position(); } -simdjson_really_inline token_position value_iterator::last_position() const noexcept { +simdjson_inline token_position value_iterator::last_position() const noexcept { return _json_iter->last_position(); } -simdjson_really_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { return _json_iter->report_error(error, message); } @@ -29468,9 +28899,9 @@ simdjson_really_inline error_code value_iterator::report_error(error_code error, namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson @@ -29480,21 +28911,21 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} -simdjson_really_inline simdjson_result array_iterator::operator*() noexcept { +simdjson_inline simdjson_result array_iterator::operator*() noexcept { if (iter.error()) { iter.abandon(); return iter.error(); } return value(iter.child()); } -simdjson_really_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { return !(*this != other); } -simdjson_really_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { return iter.is_open(); } -simdjson_really_inline array_iterator &array_iterator::operator++() noexcept { +simdjson_inline array_iterator &array_iterator::operator++() noexcept { error_code error; // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. @@ -29510,31 +28941,31 @@ simdjson_really_inline array_iterator &array_iterator::operator++() noexcept { namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value ) noexcept : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base({}, error) { } -simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } -simdjson_really_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } -simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } -simdjson_really_inline simdjson_result &simdjson_result::operator++() noexcept { +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++(first); @@ -29552,11 +28983,11 @@ namespace ondemand { // object_iterator // -simdjson_really_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept : iter{_iter} {} -simdjson_really_inline simdjson_result object_iterator::operator*() noexcept { +simdjson_inline simdjson_result object_iterator::operator*() noexcept { error_code error = iter.error(); if (error) { iter.abandon(); return error; } auto result = field::start(iter); @@ -29565,14 +28996,16 @@ simdjson_really_inline simdjson_result object_iterator::operator*() noexc if (result.error()) { iter.abandon(); } return result; } -simdjson_really_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { return !(*this != other); } -simdjson_really_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { return iter.is_open(); } -simdjson_really_inline object_iterator &object_iterator::operator++() noexcept { +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { // TODO this is a safety rail ... users should exit loops as soon as they receive an error. // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error @@ -29584,6 +29017,7 @@ simdjson_really_inline object_iterator &object_iterator::operator++() noexcept { if ((error = iter.has_next_field().get(has_value) )) { return *this; }; return *this; } +SIMDJSON_POP_DISABLE_WARNINGS // // ### Live States @@ -29632,34 +29066,34 @@ simdjson_really_inline object_iterator &object_iterator::operator++() noexcept { namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value ) noexcept : implementation_simdjson_result_base(std::forward(value)) { first.iter.assert_is_valid(); } -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base({}, error) { } -simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { if (error()) { return error(); } return *first; } // If we're iterating and there is an error, return the error once. -simdjson_really_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return !error(); } return first == other.first; } // If we're iterating and there is an error, return the error once. -simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { if (!first.iter.is_valid()) { return error(); } return first != other.first; } // Checks for ']' and ',' -simdjson_really_inline simdjson_result &simdjson_result::operator++() noexcept { +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { // Clear the error if there is one, so we don't yield it twice if (error()) { second = SUCCESS; return *this; } ++first; @@ -29711,45 +29145,45 @@ namespace ondemand { // error == SUCCESS. // -simdjson_really_inline array::array(const value_iterator &_iter) noexcept +simdjson_inline array::array(const value_iterator &_iter) noexcept : iter{_iter} { } -simdjson_really_inline simdjson_result array::start(value_iterator &iter) noexcept { +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { // We don't need to know if the array is empty to start iteration, but we do want to know if there // is an error--thus `simdjson_unused`. simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_array().get(has_value) ); return array(iter); } -simdjson_really_inline simdjson_result array::start_root(value_iterator &iter) noexcept { +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { simdjson_unused bool has_value; SIMDJSON_TRY( iter.start_root_array().get(has_value) ); return array(iter); } -simdjson_really_inline simdjson_result array::started(value_iterator &iter) noexcept { +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { bool has_value; SIMDJSON_TRY(iter.started_array().get(has_value)); return array(iter); } -simdjson_really_inline simdjson_result array::begin() noexcept { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return array_iterator(iter); } -simdjson_really_inline simdjson_result array::end() noexcept { +simdjson_inline simdjson_result array::end() noexcept { return array_iterator(iter); } -simdjson_really_inline error_code array::consume() noexcept { +simdjson_inline error_code array::consume() noexcept { auto error = iter.json_iter().skip_child(iter.depth()-1); if(error) { iter.abandon(); } return error; } -simdjson_really_inline simdjson_result array::raw_json() noexcept { +simdjson_inline simdjson_result array::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } @@ -29760,8 +29194,9 @@ simdjson_really_inline simdjson_result array::raw_json() noexc return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } +SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_really_inline simdjson_result array::count_elements() & noexcept { +simdjson_inline simdjson_result array::count_elements() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } @@ -29772,8 +29207,9 @@ simdjson_really_inline simdjson_result array::count_elements() & noexcep iter.reset_array(); return count; } +SIMDJSON_POP_DISABLE_WARNINGS -simdjson_really_inline simdjson_result array::is_empty() & noexcept { +simdjson_inline simdjson_result array::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_array().get(is_not_empty); if(error) { return error; } @@ -29820,7 +29256,7 @@ inline simdjson_result array::at_pointer(std::string_view json_pointer) n return child; } -simdjson_really_inline simdjson_result array::at(size_t index) noexcept { +simdjson_inline simdjson_result array::at(size_t index) noexcept { size_t i = 0; for (auto value : *this) { if (i == index) { return value; } @@ -29835,7 +29271,7 @@ simdjson_really_inline simdjson_result array::at(size_t index) noexcept { namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value ) noexcept : implementation_simdjson_result_base( @@ -29843,34 +29279,34 @@ simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } -simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_really_inline simdjson_result simdjson_result::end() noexcept { +simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } -simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_really_inline simdjson_result simdjson_result::is_empty() & noexcept { +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { if (error()) { return error(); } return first.is_empty(); } -simdjson_really_inline simdjson_result simdjson_result::at(size_t index) noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } @@ -29881,13 +29317,13 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline document::document(ondemand::json_iterator &&_iter) noexcept +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept : iter{std::forward(_iter)} { logger::log_start_value(iter, "document"); } -simdjson_really_inline document document::start(json_iterator &&iter) noexcept { +simdjson_inline document document::start(json_iterator &&iter) noexcept { return document(std::forward(iter)); } @@ -29910,20 +29346,20 @@ inline int32_t document::current_depth() const noexcept { inline bool document::is_alive() noexcept { return iter.is_alive(); } -simdjson_really_inline value_iterator document::resume_value_iterator() noexcept { +simdjson_inline value_iterator document::resume_value_iterator() noexcept { return value_iterator(&iter, 1, iter.root_position()); } -simdjson_really_inline value_iterator document::get_root_value_iterator() noexcept { +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { return resume_value_iterator(); } -simdjson_really_inline simdjson_result document::start_or_resume_object() noexcept { +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { if (iter.at_root()) { return get_object(); } else { return object::resume(resume_value_iterator()); } } -simdjson_really_inline simdjson_result document::get_value() noexcept { +simdjson_inline simdjson_result document::get_value() noexcept { // Make sure we start any arrays or objects before returning, so that start_root_() // gets called. iter.assert_at_document_depth(); @@ -29938,139 +29374,133 @@ simdjson_really_inline simdjson_result document::get_value() noexcept { // return value(get_root_value_iterator()); } } -simdjson_really_inline simdjson_result document::get_array() & noexcept { +simdjson_inline simdjson_result document::get_array() & noexcept { auto value = get_root_value_iterator(); return array::start_root(value); } -simdjson_really_inline simdjson_result document::get_object() & noexcept { +simdjson_inline simdjson_result document::get_object() & noexcept { auto value = get_root_value_iterator(); return object::start_root(value); } -simdjson_really_inline simdjson_result document::get_uint64() noexcept { +simdjson_inline simdjson_result document::get_uint64() noexcept { return get_root_value_iterator().get_root_uint64(); } -simdjson_really_inline simdjson_result document::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { return get_root_value_iterator().get_root_uint64_in_string(); } -simdjson_really_inline simdjson_result document::get_int64() noexcept { +simdjson_inline simdjson_result document::get_int64() noexcept { return get_root_value_iterator().get_root_int64(); } -simdjson_really_inline simdjson_result document::get_int64_in_string() noexcept { +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { return get_root_value_iterator().get_root_int64_in_string(); } -simdjson_really_inline simdjson_result document::get_double() noexcept { +simdjson_inline simdjson_result document::get_double() noexcept { return get_root_value_iterator().get_root_double(); } -simdjson_really_inline simdjson_result document::get_double_in_string() noexcept { +simdjson_inline simdjson_result document::get_double_in_string() noexcept { return get_root_value_iterator().get_root_double_in_string(); } -simdjson_really_inline simdjson_result document::get_string() noexcept { +simdjson_inline simdjson_result document::get_string() noexcept { return get_root_value_iterator().get_root_string(); } -simdjson_really_inline simdjson_result document::get_raw_json_string() noexcept { +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { return get_root_value_iterator().get_root_raw_json_string(); } -simdjson_really_inline simdjson_result document::get_bool() noexcept { +simdjson_inline simdjson_result document::get_bool() noexcept { return get_root_value_iterator().get_root_bool(); } -simdjson_really_inline bool document::is_null() noexcept { +simdjson_inline simdjson_result document::is_null() noexcept { return get_root_value_iterator().is_root_null(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_array(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_object(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_string(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_double(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_uint64(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_int64(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_bool(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_value(); } - -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_string(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_value(); } - -template simdjson_really_inline error_code document::get(T &out) & noexcept { +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { return get().get(out); } -template simdjson_really_inline error_code document::get(T &out) && noexcept { +template simdjson_inline error_code document::get(T &out) && noexcept { return std::forward(*this).get().get(out); } #if SIMDJSON_EXCEPTIONS -simdjson_really_inline document::operator array() & noexcept(false) { return get_array(); } -simdjson_really_inline document::operator object() & noexcept(false) { return get_object(); } -simdjson_really_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_really_inline document::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_really_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_really_inline document::operator std::string_view() noexcept(false) { return get_string(); } -simdjson_really_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_really_inline document::operator bool() noexcept(false) { return get_bool(); } -simdjson_really_inline document::operator value() noexcept(false) { return get_value(); } +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } #endif -simdjson_really_inline simdjson_result document::count_elements() & noexcept { +simdjson_inline simdjson_result document::count_elements() & noexcept { auto a = get_array(); simdjson_result answer = a.count_elements(); /* If there was an array, we are now left pointing at its first element. */ - if(answer.error() == SUCCESS) { - iter._depth = 1 ; /* undoing the increment so we go back at the doc depth.*/ - iter.assert_at_document_depth(); - } + if(answer.error() == SUCCESS) { rewind(); } return answer; } -simdjson_really_inline simdjson_result document::count_fields() & noexcept { +simdjson_inline simdjson_result document::count_fields() & noexcept { auto a = get_object(); simdjson_result answer = a.count_fields(); - /* If there was an array, we are now left pointing at its first element. */ - if(answer.error() == SUCCESS) { - iter._depth = 1 ; /* undoing the increment so we go back at the doc depth.*/ - iter.assert_at_document_depth(); - } + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } return answer; } -simdjson_really_inline simdjson_result document::at(size_t index) & noexcept { +simdjson_inline simdjson_result document::at(size_t index) & noexcept { auto a = get_array(); return a.at(index); } -simdjson_really_inline simdjson_result document::begin() & noexcept { +simdjson_inline simdjson_result document::begin() & noexcept { return get_array().begin(); } -simdjson_really_inline simdjson_result document::end() & noexcept { +simdjson_inline simdjson_result document::end() & noexcept { return {}; } -simdjson_really_inline simdjson_result document::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { return start_or_resume_object().find_field(key); } -simdjson_really_inline simdjson_result document::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { return start_or_resume_object().find_field(key); } -simdjson_really_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { return start_or_resume_object().find_field_unordered(key); } -simdjson_really_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { return start_or_resume_object().find_field_unordered(key); } -simdjson_really_inline simdjson_result document::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { return start_or_resume_object()[key]; } -simdjson_really_inline simdjson_result document::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { return start_or_resume_object()[key]; } -simdjson_really_inline error_code document::consume() noexcept { +simdjson_inline error_code document::consume() noexcept { auto error = iter.skip_child(0); if(error) { iter.abandon(); } return error; } -simdjson_really_inline simdjson_result document::raw_json() noexcept { +simdjson_inline simdjson_result document::raw_json() noexcept { auto _iter = get_root_value_iterator(); const uint8_t * starting_point{_iter.peek_start()}; auto error = consume(); @@ -30082,40 +29512,40 @@ simdjson_really_inline simdjson_result document::raw_json() no return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_really_inline simdjson_result document::type() noexcept { +simdjson_inline simdjson_result document::type() noexcept { return get_root_value_iterator().type(); } -simdjson_really_inline simdjson_result document::is_scalar() noexcept { +simdjson_inline simdjson_result document::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } -simdjson_really_inline bool document::is_negative() noexcept { +simdjson_inline bool document::is_negative() noexcept { return get_root_value_iterator().is_root_negative(); } -simdjson_really_inline simdjson_result document::is_integer() noexcept { +simdjson_inline simdjson_result document::is_integer() noexcept { return get_root_value_iterator().is_root_integer(); } -simdjson_really_inline simdjson_result document::get_number_type() noexcept { +simdjson_inline simdjson_result document::get_number_type() noexcept { return get_root_value_iterator().get_root_number_type(); } -simdjson_really_inline simdjson_result document::get_number() noexcept { +simdjson_inline simdjson_result document::get_number() noexcept { return get_root_value_iterator().get_root_number(); } -simdjson_really_inline simdjson_result document::raw_json_token() noexcept { +simdjson_inline simdjson_result document::raw_json_token() noexcept { auto _iter = get_root_value_iterator(); return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); } -simdjson_really_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { rewind(); // Rewind the document each time at_pointer is called if (json_pointer.empty()) { return this->get_value(); @@ -30139,7 +29569,7 @@ simdjson_really_inline simdjson_result document::at_pointer(std::string_v namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value ) noexcept : implementation_simdjson_result_base( @@ -30147,7 +29577,7 @@ simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base( @@ -30155,216 +29585,216 @@ simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_really_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_really_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_string() noexcept { if (error()) { return error(); } return first.get_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_really_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_really_inline bool simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } template -simdjson_really_inline simdjson_result simdjson_result::get() & noexcept { +simdjson_inline simdjson_result simdjson_result::get() & noexcept { if (error()) { return error(); } return first.get(); } template -simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { +simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first).get(); } template -simdjson_really_inline error_code simdjson_result::get(T &out) & noexcept { +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { if (error()) { return error(); } return first.get(out); } template -simdjson_really_inline error_code simdjson_result::get(T &out) && noexcept { +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { if (error()) { return error(); } return std::forward(first).get(out); } -template<> simdjson_really_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { if (error()) { return error(); } return std::forward(first); } -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) & noexcept = delete; -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) && noexcept { +template<> simdjson_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) && noexcept { if (error()) { return error(); } out = std::forward(first); return SUCCESS; } -simdjson_really_inline simdjson_result simdjson_result::type() noexcept { +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } -simdjson_really_inline bool simdjson_result::is_negative() noexcept { +simdjson_inline bool simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } -simdjson_really_inline int32_t simdjson_result::current_depth() const noexcept { +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } -simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } -simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } @@ -30377,54 +29807,54 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline document_reference::document_reference() noexcept : doc{nullptr} {} -simdjson_really_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} -simdjson_really_inline void document_reference::rewind() noexcept { doc->rewind(); } -simdjson_really_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } -simdjson_really_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } -simdjson_really_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_uint64(); } -simdjson_really_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_int64(); } -simdjson_really_inline simdjson_result document_reference::get_double() noexcept { return doc->get_double(); } -simdjson_really_inline simdjson_result document_reference::get_string() noexcept { return doc->get_string(); } -simdjson_really_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_raw_json_string(); } -simdjson_really_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_bool(); } -simdjson_really_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } -simdjson_really_inline bool document_reference::is_null() noexcept { return doc->is_null(); } +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_uint64(); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_int64(); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_double(); } +simdjson_inline simdjson_result document_reference::get_string() noexcept { return doc->get_string(); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_raw_json_string(); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_bool(); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->is_null(); } #if SIMDJSON_EXCEPTIONS -simdjson_really_inline document_reference::operator array() & noexcept(false) { return array(*doc); } -simdjson_really_inline document_reference::operator object() & noexcept(false) { return object(*doc); } -simdjson_really_inline document_reference::operator uint64_t() noexcept(false) { return uint64_t(*doc); } -simdjson_really_inline document_reference::operator int64_t() noexcept(false) { return int64_t(*doc); } -simdjson_really_inline document_reference::operator double() noexcept(false) { return double(*doc); } -simdjson_really_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_really_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } -simdjson_really_inline document_reference::operator bool() noexcept(false) { return bool(*doc); } -simdjson_really_inline document_reference::operator value() noexcept(false) { return value(*doc); } +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return uint64_t(*doc); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return int64_t(*doc); } +simdjson_inline document_reference::operator double() noexcept(false) { return double(*doc); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator bool() noexcept(false) { return bool(*doc); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } #endif -simdjson_really_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } -simdjson_really_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } -simdjson_really_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } -simdjson_really_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } -simdjson_really_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } -simdjson_really_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } -simdjson_really_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } -simdjson_really_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } -simdjson_really_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } -simdjson_really_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } -simdjson_really_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } -simdjson_really_inline simdjson_result document_reference::type() noexcept { return doc->type(); } -simdjson_really_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_really_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } -simdjson_really_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } -simdjson_really_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } -simdjson_really_inline simdjson_result document_reference::is_integer() noexcept { return doc->is_integer(); } -simdjson_really_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_number_type(); } -simdjson_really_inline simdjson_result document_reference::get_number() noexcept { return doc->get_number(); } -simdjson_really_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } -simdjson_really_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } -simdjson_really_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} -simdjson_really_inline document_reference::operator document&() const noexcept { return *doc; } +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->is_integer(); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_number_type(); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_number(); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } } // namespace ondemand } // namespace SIMDJSON_BUILTIN_IMPLEMENTATION @@ -30433,172 +29863,172 @@ simdjson_really_inline document_reference::operator document&() const noexcept { namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept : implementation_simdjson_result_base(std::forward(value), error) {} -simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_really_inline simdjson_result simdjson_result::at(size_t index) & noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_really_inline error_code simdjson_result::rewind() noexcept { +simdjson_inline error_code simdjson_result::rewind() noexcept { if (error()) { return error(); } first.rewind(); return SUCCESS; } -simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { return {}; } -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_string() noexcept { if (error()) { return error(); } return first.get_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_really_inline simdjson_result simdjson_result::get_value() noexcept { +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { if (error()) { return error(); } return first.get_value(); } -simdjson_really_inline bool simdjson_result::is_null() noexcept { +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { if (error()) { return error(); } return first.is_null(); } -simdjson_really_inline simdjson_result simdjson_result::type() noexcept { +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } -simdjson_really_inline bool simdjson_result::is_negative() noexcept { +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } -simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } -simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } @@ -30611,24 +30041,24 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline value::value(const value_iterator &_iter) noexcept +simdjson_inline value::value(const value_iterator &_iter) noexcept : iter{_iter} { } -simdjson_really_inline value value::start(const value_iterator &iter) noexcept { +simdjson_inline value value::start(const value_iterator &iter) noexcept { return iter; } -simdjson_really_inline value value::resume(const value_iterator &iter) noexcept { +simdjson_inline value value::resume(const value_iterator &iter) noexcept { return iter; } -simdjson_really_inline simdjson_result value::get_array() noexcept { +simdjson_inline simdjson_result value::get_array() noexcept { return array::start(iter); } -simdjson_really_inline simdjson_result value::get_object() noexcept { +simdjson_inline simdjson_result value::get_object() noexcept { return object::start(iter); } -simdjson_really_inline simdjson_result value::start_or_resume_object() noexcept { +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { if (iter.at_start()) { return get_object(); } else { @@ -30636,85 +30066,84 @@ simdjson_really_inline simdjson_result value::start_or_resume_object() n } } -simdjson_really_inline simdjson_result value::get_raw_json_string() noexcept { +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { return iter.get_raw_json_string(); } -simdjson_really_inline simdjson_result value::get_string() noexcept { +simdjson_inline simdjson_result value::get_string() noexcept { return iter.get_string(); } -simdjson_really_inline simdjson_result value::get_double() noexcept { +simdjson_inline simdjson_result value::get_double() noexcept { return iter.get_double(); } -simdjson_really_inline simdjson_result value::get_double_in_string() noexcept { +simdjson_inline simdjson_result value::get_double_in_string() noexcept { return iter.get_double_in_string(); } -simdjson_really_inline simdjson_result value::get_uint64() noexcept { +simdjson_inline simdjson_result value::get_uint64() noexcept { return iter.get_uint64(); } -simdjson_really_inline simdjson_result value::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { return iter.get_uint64_in_string(); } -simdjson_really_inline simdjson_result value::get_int64() noexcept { +simdjson_inline simdjson_result value::get_int64() noexcept { return iter.get_int64(); } -simdjson_really_inline simdjson_result value::get_int64_in_string() noexcept { +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { return iter.get_int64_in_string(); } -simdjson_really_inline simdjson_result value::get_bool() noexcept { +simdjson_inline simdjson_result value::get_bool() noexcept { return iter.get_bool(); } -simdjson_really_inline bool value::is_null() noexcept { +simdjson_inline simdjson_result value::is_null() noexcept { return iter.is_null(); } - -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_array(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_object(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_string(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_number(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_double(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_uint64(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_int64(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_bool(); } - -template simdjson_really_inline error_code value::get(T &out) noexcept { +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { return get().get(out); } #if SIMDJSON_EXCEPTIONS -simdjson_really_inline value::operator array() noexcept(false) { +simdjson_inline value::operator array() noexcept(false) { return get_array(); } -simdjson_really_inline value::operator object() noexcept(false) { +simdjson_inline value::operator object() noexcept(false) { return get_object(); } -simdjson_really_inline value::operator uint64_t() noexcept(false) { +simdjson_inline value::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_really_inline value::operator int64_t() noexcept(false) { +simdjson_inline value::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_really_inline value::operator double() noexcept(false) { +simdjson_inline value::operator double() noexcept(false) { return get_double(); } -simdjson_really_inline value::operator std::string_view() noexcept(false) { +simdjson_inline value::operator std::string_view() noexcept(false) { return get_string(); } -simdjson_really_inline value::operator raw_json_string() noexcept(false) { +simdjson_inline value::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_really_inline value::operator bool() noexcept(false) { +simdjson_inline value::operator bool() noexcept(false) { return get_bool(); } #endif -simdjson_really_inline simdjson_result value::begin() & noexcept { +simdjson_inline simdjson_result value::begin() & noexcept { return get_array().begin(); } -simdjson_really_inline simdjson_result value::end() & noexcept { +simdjson_inline simdjson_result value::end() & noexcept { return {}; } -simdjson_really_inline simdjson_result value::count_elements() & noexcept { +simdjson_inline simdjson_result value::count_elements() & noexcept { simdjson_result answer; auto a = get_array(); answer = a.count_elements(); @@ -30724,77 +30153,77 @@ simdjson_really_inline simdjson_result value::count_elements() & noexcep iter.move_at_start(); return answer; } -simdjson_really_inline simdjson_result value::count_fields() & noexcept { +simdjson_inline simdjson_result value::count_fields() & noexcept { simdjson_result answer; auto a = get_object(); answer = a.count_fields(); iter.move_at_start(); return answer; } -simdjson_really_inline simdjson_result value::at(size_t index) noexcept { +simdjson_inline simdjson_result value::at(size_t index) noexcept { auto a = get_array(); return a.at(index); } -simdjson_really_inline simdjson_result value::find_field(std::string_view key) noexcept { +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { return start_or_resume_object().find_field(key); } -simdjson_really_inline simdjson_result value::find_field(const char *key) noexcept { +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { return start_or_resume_object().find_field(key); } -simdjson_really_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { return start_or_resume_object().find_field_unordered(key); } -simdjson_really_inline simdjson_result value::find_field_unordered(const char *key) noexcept { +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { return start_or_resume_object().find_field_unordered(key); } -simdjson_really_inline simdjson_result value::operator[](std::string_view key) noexcept { +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { return start_or_resume_object()[key]; } -simdjson_really_inline simdjson_result value::operator[](const char *key) noexcept { +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { return start_or_resume_object()[key]; } -simdjson_really_inline simdjson_result value::type() noexcept { +simdjson_inline simdjson_result value::type() noexcept { return iter.type(); } -simdjson_really_inline simdjson_result value::is_scalar() noexcept { +simdjson_inline simdjson_result value::is_scalar() noexcept { json_type this_type; auto error = type().get(this_type); if(error) { return error; } return ! ((this_type == json_type::array) || (this_type == json_type::object)); } -simdjson_really_inline bool value::is_negative() noexcept { +simdjson_inline bool value::is_negative() noexcept { return iter.is_negative(); } -simdjson_really_inline simdjson_result value::is_integer() noexcept { +simdjson_inline simdjson_result value::is_integer() noexcept { return iter.is_integer(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value::get_number_type() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { return iter.get_number_type(); } -simdjson_warn_unused simdjson_really_inline simdjson_result value::get_number() noexcept { +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { return iter.get_number(); } -simdjson_really_inline std::string_view value::raw_json_token() noexcept { +simdjson_inline std::string_view value::raw_json_token() noexcept { return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); } -simdjson_really_inline simdjson_result value::current_location() noexcept { +simdjson_inline simdjson_result value::current_location() noexcept { return iter.json_iter().current_location(); } -simdjson_really_inline int32_t value::current_depth() const noexcept{ +simdjson_inline int32_t value::current_depth() const noexcept{ return iter.json_iter().depth(); } -simdjson_really_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { json_type t; SIMDJSON_TRY(type().get(t)); switch (t) @@ -30814,7 +30243,7 @@ simdjson_really_inline simdjson_result value::at_pointer(std::string_view namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value ) noexcept : implementation_simdjson_result_base( @@ -30822,203 +30251,203 @@ simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } -simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { if (error()) { return error(); } return first.count_elements(); } -simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } -simdjson_really_inline simdjson_result simdjson_result::at(size_t index) noexcept { +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { if (error()) { return error(); } return first.at(index); } -simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { +simdjson_inline simdjson_result simdjson_result::end() & noexcept { if (error()) { return error(); } return {}; } -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { if (error()) { return error(); } return first[key]; } -simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { if (error()) { return error(); } return first[key]; } -simdjson_really_inline simdjson_result simdjson_result::get_array() noexcept { +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { if (error()) { return error(); } return first.get_array(); } -simdjson_really_inline simdjson_result simdjson_result::get_object() noexcept { +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { if (error()) { return error(); } return first.get_object(); } -simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { if (error()) { return error(); } return first.get_uint64(); } -simdjson_really_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { if (error()) { return error(); } return first.get_uint64_in_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { if (error()) { return error(); } return first.get_int64(); } -simdjson_really_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { if (error()) { return error(); } return first.get_int64_in_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { if (error()) { return error(); } return first.get_double(); } -simdjson_really_inline simdjson_result simdjson_result::get_double_in_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { if (error()) { return error(); } return first.get_double_in_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_string() noexcept { if (error()) { return error(); } return first.get_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { if (error()) { return error(); } return first.get_raw_json_string(); } -simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { if (error()) { return error(); } return first.get_bool(); } -simdjson_really_inline bool simdjson_result::is_null() noexcept { - if (error()) { return false; } +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } return first.is_null(); } -template simdjson_really_inline simdjson_result simdjson_result::get() noexcept { +template simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return first.get(); } -template simdjson_really_inline error_code simdjson_result::get(T &out) noexcept { +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { if (error()) { return error(); } return first.get(out); } -template<> simdjson_really_inline simdjson_result simdjson_result::get() noexcept { +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { if (error()) { return error(); } return std::move(first); } -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &out) noexcept { +template<> simdjson_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &out) noexcept { if (error()) { return error(); } out = first; return SUCCESS; } -simdjson_really_inline simdjson_result simdjson_result::type() noexcept { +simdjson_inline simdjson_result simdjson_result::type() noexcept { if (error()) { return error(); } return first.type(); } -simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { if (error()) { return error(); } return first.is_scalar(); } -simdjson_really_inline simdjson_result simdjson_result::is_negative() noexcept { +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { if (error()) { return error(); } return first.is_negative(); } -simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { if (error()) { return error(); } return first.is_integer(); } -simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { if (error()) { return error(); } return first.get_number_type(); } -simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { if (error()) { return error(); } return first.get_number(); } #if SIMDJSON_EXCEPTIONS -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false) { +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false) { +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator double() noexcept(false) { +simdjson_inline simdjson_result::operator double() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } -simdjson_really_inline simdjson_result::operator bool() noexcept(false) { +simdjson_inline simdjson_result::operator bool() noexcept(false) { if (error()) { throw simdjson_error(error()); } return first; } #endif -simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { if (error()) { return error(); } return first.raw_json_token(); } -simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { if (error()) { return error(); } return first.current_location(); } -simdjson_really_inline int32_t simdjson_result::current_depth() const noexcept { +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { if (error()) { return error(); } return first.current_depth(); } -simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } @@ -31031,41 +30460,41 @@ namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { // clang 6 doesn't think the default constructor can be noexcept, so we make it explicit -simdjson_really_inline field::field() noexcept : std::pair() {} +simdjson_inline field::field() noexcept : std::pair() {} -simdjson_really_inline field::field(raw_json_string key, ondemand::value &&value) noexcept +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept : std::pair(key, std::forward(value)) { } -simdjson_really_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { raw_json_string key; SIMDJSON_TRY( parent_iter.field_key().get(key) ); SIMDJSON_TRY( parent_iter.field_value() ); return field::start(parent_iter, key); } -simdjson_really_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { return field(key, parent_iter.child()); } -simdjson_really_inline simdjson_warn_unused simdjson_result field::unescaped_key() noexcept { +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key() noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.string_buf_loc()); + simdjson_result answer = first.unescape(second.iter.json_iter()); first.consume(); return answer; } -simdjson_really_inline raw_json_string field::key() const noexcept { +simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; } -simdjson_really_inline value &field::value() & noexcept { +simdjson_inline value &field::value() & noexcept { return second; } -simdjson_really_inline value field::value() && noexcept { +simdjson_inline value field::value() && noexcept { return std::forward(*this).second; } @@ -31075,7 +30504,7 @@ simdjson_really_inline value field::value() && noexcept { namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value ) noexcept : implementation_simdjson_result_base( @@ -31083,22 +30512,22 @@ simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } -simdjson_really_inline simdjson_result simdjson_result::key() noexcept { +simdjson_inline simdjson_result simdjson_result::key() noexcept { if (error()) { return error(); } return first.key(); } -simdjson_really_inline simdjson_result simdjson_result::unescaped_key() noexcept { +simdjson_inline simdjson_result simdjson_result::unescaped_key() noexcept { if (error()) { return error(); } return first.unescaped_key(); } -simdjson_really_inline simdjson_result simdjson_result::value() noexcept { +simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); } @@ -31110,46 +30539,46 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { return NO_SUCH_FIELD; } return value(iter.child()); } -simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); if (!has_value) { return NO_SUCH_FIELD; } return value(iter.child()); } -simdjson_really_inline simdjson_result object::operator[](const std::string_view key) & noexcept { +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { return find_field_unordered(key); } -simdjson_really_inline simdjson_result object::operator[](const std::string_view key) && noexcept { +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { return std::forward(*this).find_field_unordered(key); } -simdjson_really_inline simdjson_result object::find_field(const std::string_view key) & noexcept { +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { return NO_SUCH_FIELD; } return value(iter.child()); } -simdjson_really_inline simdjson_result object::find_field(const std::string_view key) && noexcept { +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { bool has_value; SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); if (!has_value) { return NO_SUCH_FIELD; } return value(iter.child()); } -simdjson_really_inline simdjson_result object::start(value_iterator &iter) noexcept { +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_object().error() ); return object(iter); } -simdjson_really_inline simdjson_result object::start_root(value_iterator &iter) noexcept { +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.start_root_object().error() ); return object(iter); } -simdjson_really_inline error_code object::consume() noexcept { +simdjson_inline error_code object::consume() noexcept { if(iter.is_at_key()) { /** * whenever you are pointing at a key, calling skip_child() is @@ -31175,7 +30604,7 @@ simdjson_really_inline error_code object::consume() noexcept { return error_skip; } -simdjson_really_inline simdjson_result object::raw_json() noexcept { +simdjson_inline simdjson_result object::raw_json() noexcept { const uint8_t * starting_point{iter.peek_start()}; auto error = consume(); if(error) { return error; } @@ -31183,27 +30612,27 @@ simdjson_really_inline simdjson_result object::raw_json() noex return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); } -simdjson_really_inline simdjson_result object::started(value_iterator &iter) noexcept { +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { SIMDJSON_TRY( iter.started_object().error() ); return object(iter); } -simdjson_really_inline object object::resume(const value_iterator &iter) noexcept { +simdjson_inline object object::resume(const value_iterator &iter) noexcept { return iter; } -simdjson_really_inline object::object(const value_iterator &_iter) noexcept +simdjson_inline object::object(const value_iterator &_iter) noexcept : iter{_iter} { } -simdjson_really_inline simdjson_result object::begin() noexcept { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } #endif return object_iterator(iter); } -simdjson_really_inline simdjson_result object::end() noexcept { +simdjson_inline simdjson_result object::end() noexcept { return object_iterator(iter); } @@ -31247,7 +30676,7 @@ inline simdjson_result object::at_pointer(std::string_view json_pointer) return child; } -simdjson_really_inline simdjson_result object::count_fields() & noexcept { +simdjson_inline simdjson_result object::count_fields() & noexcept { size_t count{0}; // Important: we do not consume any of the values. for(simdjson_unused auto v : *this) { count++; } @@ -31259,14 +30688,14 @@ simdjson_really_inline simdjson_result object::count_fields() & noexcept return count; } -simdjson_really_inline simdjson_result object::is_empty() & noexcept { +simdjson_inline simdjson_result object::is_empty() & noexcept { bool is_not_empty; auto error = iter.reset_object().get(is_not_empty); if(error) { return error; } return !is_not_empty; } -simdjson_really_inline simdjson_result object::reset() & noexcept { +simdjson_inline simdjson_result object::reset() & noexcept { return iter.reset_object(); } @@ -31276,45 +30705,45 @@ simdjson_really_inline simdjson_result object::reset() & noexcept { namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} -simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { +simdjson_inline simdjson_result simdjson_result::begin() noexcept { if (error()) { return error(); } return first.begin(); } -simdjson_really_inline simdjson_result simdjson_result::end() noexcept { +simdjson_inline simdjson_result simdjson_result::end() noexcept { if (error()) { return error(); } return first.end(); } -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field_unordered(key); } -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field_unordered(key); } -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { if (error()) { return error(); } return first[key]; } -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first)[key]; } -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { if (error()) { return error(); } return first.find_field(key); } -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { if (error()) { return error(); } return std::forward(first).find_field(key); } -simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { if (error()) { return error(); } return first.at_pointer(json_pointer); } @@ -31329,7 +30758,7 @@ inline simdjson_result simdjson_result simdjson_result::count_fields() & noexcept { +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { if (error()) { return error(); } return first.count_fields(); } @@ -31341,11 +30770,11 @@ namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { -simdjson_really_inline parser::parser(size_t max_capacity) noexcept +simdjson_inline parser::parser(size_t max_capacity) noexcept : _max_capacity{max_capacity} { } -simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { if (new_capacity > max_capacity()) { return CAPACITY; } if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } @@ -31353,7 +30782,7 @@ simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t n _capacity = 0; size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS +#if SIMDJSON_DEVELOPMENT_CHECKS start_positions.reset(new (std::nothrow) token_position[new_max_depth]); #endif if (implementation) { @@ -31367,7 +30796,7 @@ simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t n return SUCCESS; } -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } // Allocate if needed @@ -31380,37 +30809,37 @@ simdjson_warn_unused simdjson_really_inline simdjson_result parser::it return document::start({ reinterpret_cast(json.data()), this }); } -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { return iterate(padded_string_view(json, len, allocated)); } -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { return iterate(padded_string_view(json, allocated)); } -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const std::string &json) & noexcept { +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { return iterate(padded_string_view(json)); } -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); padded_string_view json = result.value_unsafe(); return iterate(json); } -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception SIMDJSON_TRY( result.error() ); const padded_string &json = result.value_unsafe(); return iterate(json); } -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } // Allocate if needed @@ -31437,34 +30866,41 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size); } -simdjson_really_inline size_t parser::capacity() const noexcept { +simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_really_inline size_t parser::max_capacity() const noexcept { +simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_really_inline size_t parser::max_depth() const noexcept { +simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } -simdjson_really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - size_t MINIMAL_DOCUMENT_CAPACITY = 32; - if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) { +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { _max_capacity = max_capacity; } else { - _max_capacity = MINIMAL_DOCUMENT_CAPACITY; + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; } } +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + } // namespace ondemand } // namespace SIMDJSON_BUILTIN_IMPLEMENTATION } // namespace simdjson namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept : implementation_simdjson_result_base(error) {} } // namespace simdjson @@ -31552,7 +30988,7 @@ inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t nex #endif // SIMDJSON_THREADS_ENABLED -simdjson_really_inline document_stream::document_stream( +simdjson_inline document_stream::document_stream( ondemand::parser &_parser, const uint8_t *_buf, size_t _len, @@ -31574,7 +31010,7 @@ simdjson_really_inline document_stream::document_stream( #endif } -simdjson_really_inline document_stream::document_stream() noexcept +simdjson_inline document_stream::document_stream() noexcept : parser{nullptr}, buf{nullptr}, len{0}, @@ -31586,7 +31022,7 @@ simdjson_really_inline document_stream::document_stream() noexcept { } -simdjson_really_inline document_stream::~document_stream() noexcept +simdjson_inline document_stream::~document_stream() noexcept { #ifdef SIMDJSON_THREADS_ENABLED worker.reset(); @@ -31602,20 +31038,20 @@ inline size_t document_stream::truncated_bytes() const noexcept { return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; } -simdjson_really_inline document_stream::iterator::iterator() noexcept +simdjson_inline document_stream::iterator::iterator() noexcept : stream{nullptr}, finished{true} { } -simdjson_really_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept : stream{_stream}, finished{is_end} { } -simdjson_really_inline simdjson_result document_stream::iterator::operator*() noexcept { +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { //if(stream->error) { return stream->error; } return simdjson_result(stream->doc, stream->error); } -simdjson_really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { // If there is an error, then we want the iterator // to be finished, no matter what. (E.g., we do not // keep generating documents with errors, or go beyond @@ -31642,17 +31078,17 @@ simdjson_really_inline document_stream::iterator& document_stream::iterator::ope return *this; } -simdjson_really_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } -simdjson_really_inline document_stream::iterator document_stream::begin() noexcept { +simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. return iterator(this, error == EMPTY); } -simdjson_really_inline document_stream::iterator document_stream::end() noexcept { +simdjson_inline document_stream::iterator document_stream::end() noexcept { return iterator(this, true); } @@ -31782,11 +31218,11 @@ inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch } } -simdjson_really_inline size_t document_stream::iterator::current_index() const noexcept { +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { return stream->doc_index; } -simdjson_really_inline std::string_view document_stream::iterator::source() const noexcept { +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { auto depth = stream->doc.iter.depth(); auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); @@ -31861,13 +31297,13 @@ inline void document_stream::start_stage1_thread() noexcept { namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( error_code error ) noexcept : implementation_simdjson_result_base(error) { } -simdjson_really_inline simdjson_result::simdjson_result( +simdjson_inline simdjson_result::simdjson_result( SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value ) noexcept : implementation_simdjson_result_base( diff --git a/simdjson/simdjson_source/README b/simdjson/simdjson_source/README deleted file mode 100644 index 499cab1..0000000 --- a/simdjson/simdjson_source/README +++ /dev/null @@ -1,7 +0,0 @@ -singleheader/ files from the simdjson library. These are not used directly. -Patch-applied files are used instead, in the simdjson/ directory. - -To update: - * Copy from simdjson: https://github.com/simdjson/simdjson/tree/master/singleheader - * Run simdjson/patches/apply.sh - * Increase pysimdjson version in setup.py diff --git a/simdjson/simdjson_source/simdjson.cpp b/simdjson/simdjson_source/simdjson.cpp deleted file mode 100644 index c98c4b1..0000000 --- a/simdjson/simdjson_source/simdjson.cpp +++ /dev/null @@ -1,15051 +0,0 @@ -/* auto-generated on 2022-06-02 13:56:20 -0400. Do not edit! */ -/* begin file src/simdjson.cpp */ -#include "simdjson.h" - -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_UNDESIRED_WARNINGS - -/* begin file src/to_chars.cpp */ -#include -#include -#include -#include - -namespace simdjson { -namespace internal { -/*! -implements the Grisu2 algorithm for binary to decimal floating-point -conversion. -Adapted from JSON for Modern C++ - -This implementation is a slightly modified version of the reference -implementation which may be obtained from -http://florian.loitsch.com/publications (bench.tar.gz). -The code is distributed under the MIT license, Copyright (c) 2009 Florian -Loitsch. For a detailed description of the algorithm see: [1] Loitsch, "Printing -Floating-Point Numbers Quickly and Accurately with Integers", Proceedings of the -ACM SIGPLAN 2010 Conference on Programming Language Design and Implementation, -PLDI 2010 [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and -Accurately", Proceedings of the ACM SIGPLAN 1996 Conference on Programming -Language Design and Implementation, PLDI 1996 -*/ -namespace dtoa_impl { - -template -Target reinterpret_bits(const Source source) { - static_assert(sizeof(Target) == sizeof(Source), "size mismatch"); - - Target target; - std::memcpy(&target, &source, sizeof(Source)); - return target; -} - -struct diyfp // f * 2^e -{ - static constexpr int kPrecision = 64; // = q - - std::uint64_t f = 0; - int e = 0; - - constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {} - - /*! - @brief returns x - y - @pre x.e == y.e and x.f >= y.f - */ - static diyfp sub(const diyfp &x, const diyfp &y) noexcept { - - return {x.f - y.f, x.e}; - } - - /*! - @brief returns x * y - @note The result is rounded. (Only the upper q bits are returned.) - */ - static diyfp mul(const diyfp &x, const diyfp &y) noexcept { - static_assert(kPrecision == 64, "internal error"); - - // Computes: - // f = round((x.f * y.f) / 2^q) - // e = x.e + y.e + q - - // Emulate the 64-bit * 64-bit multiplication: - // - // p = u * v - // = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi) - // = (u_lo v_lo ) + 2^32 ((u_lo v_hi ) + (u_hi v_lo )) + - // 2^64 (u_hi v_hi ) = (p0 ) + 2^32 ((p1 ) + (p2 )) - // + 2^64 (p3 ) = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo + - // 2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3 ) = - // (p0_lo ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi + - // p2_hi + p3) = (p0_lo ) + 2^32 (Q ) + 2^64 (H ) = (p0_lo ) + - // 2^32 (Q_lo + 2^32 Q_hi ) + 2^64 (H ) - // - // (Since Q might be larger than 2^32 - 1) - // - // = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H) - // - // (Q_hi + H does not overflow a 64-bit int) - // - // = p_lo + 2^64 p_hi - - const std::uint64_t u_lo = x.f & 0xFFFFFFFFu; - const std::uint64_t u_hi = x.f >> 32u; - const std::uint64_t v_lo = y.f & 0xFFFFFFFFu; - const std::uint64_t v_hi = y.f >> 32u; - - const std::uint64_t p0 = u_lo * v_lo; - const std::uint64_t p1 = u_lo * v_hi; - const std::uint64_t p2 = u_hi * v_lo; - const std::uint64_t p3 = u_hi * v_hi; - - const std::uint64_t p0_hi = p0 >> 32u; - const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu; - const std::uint64_t p1_hi = p1 >> 32u; - const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu; - const std::uint64_t p2_hi = p2 >> 32u; - - std::uint64_t Q = p0_hi + p1_lo + p2_lo; - - // The full product might now be computed as - // - // p_hi = p3 + p2_hi + p1_hi + (Q >> 32) - // p_lo = p0_lo + (Q << 32) - // - // But in this particular case here, the full p_lo is not required. - // Effectively we only need to add the highest bit in p_lo to p_hi (and - // Q_hi + 1 does not overflow). - - Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up - - const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u); - - return {h, x.e + y.e + 64}; - } - - /*! - @brief normalize x such that the significand is >= 2^(q-1) - @pre x.f != 0 - */ - static diyfp normalize(diyfp x) noexcept { - - while ((x.f >> 63u) == 0) { - x.f <<= 1u; - x.e--; - } - - return x; - } - - /*! - @brief normalize x such that the result has the exponent E - @pre e >= x.e and the upper e - x.e bits of x.f must be zero. - */ - static diyfp normalize_to(const diyfp &x, - const int target_exponent) noexcept { - const int delta = x.e - target_exponent; - - return {x.f << delta, target_exponent}; - } -}; - -struct boundaries { - diyfp w; - diyfp minus; - diyfp plus; -}; - -/*! -Compute the (normalized) diyfp representing the input number 'value' and its -boundaries. -@pre value must be finite and positive -*/ -template boundaries compute_boundaries(FloatType value) { - - // Convert the IEEE representation into a diyfp. - // - // If v is denormal: - // value = 0.F * 2^(1 - bias) = ( F) * 2^(1 - bias - (p-1)) - // If v is normalized: - // value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1)) - - static_assert(std::numeric_limits::is_iec559, - "internal error: dtoa_short requires an IEEE-754 " - "floating-point implementation"); - - constexpr int kPrecision = - std::numeric_limits::digits; // = p (includes the hidden bit) - constexpr int kBias = - std::numeric_limits::max_exponent - 1 + (kPrecision - 1); - constexpr int kMinExp = 1 - kBias; - constexpr std::uint64_t kHiddenBit = std::uint64_t{1} - << (kPrecision - 1); // = 2^(p-1) - - using bits_type = typename std::conditional::type; - - const std::uint64_t bits = reinterpret_bits(value); - const std::uint64_t E = bits >> (kPrecision - 1); - const std::uint64_t F = bits & (kHiddenBit - 1); - - const bool is_denormal = E == 0; - const diyfp v = is_denormal - ? diyfp(F, kMinExp) - : diyfp(F + kHiddenBit, static_cast(E) - kBias); - - // Compute the boundaries m- and m+ of the floating-point value - // v = f * 2^e. - // - // Determine v- and v+, the floating-point predecessor and successor if v, - // respectively. - // - // v- = v - 2^e if f != 2^(p-1) or e == e_min (A) - // = v - 2^(e-1) if f == 2^(p-1) and e > e_min (B) - // - // v+ = v + 2^e - // - // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_ - // between m- and m+ round to v, regardless of how the input rounding - // algorithm breaks ties. - // - // ---+-------------+-------------+-------------+-------------+--- (A) - // v- m- v m+ v+ - // - // -----------------+------+------+-------------+-------------+--- (B) - // v- m- v m+ v+ - - const bool lower_boundary_is_closer = F == 0 && E > 1; - const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1); - const diyfp m_minus = lower_boundary_is_closer - ? diyfp(4 * v.f - 1, v.e - 2) // (B) - : diyfp(2 * v.f - 1, v.e - 1); // (A) - - // Determine the normalized w+ = m+. - const diyfp w_plus = diyfp::normalize(m_plus); - - // Determine w- = m- such that e_(w-) = e_(w+). - const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e); - - return {diyfp::normalize(v), w_minus, w_plus}; -} - -// Given normalized diyfp w, Grisu needs to find a (normalized) cached -// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies -// within a certain range [alpha, gamma] (Definition 3.2 from [1]) -// -// alpha <= e = e_c + e_w + q <= gamma -// -// or -// -// f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q -// <= f_c * f_w * 2^gamma -// -// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies -// -// 2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma -// -// or -// -// 2^(q - 2 + alpha) <= c * w < 2^(q + gamma) -// -// The choice of (alpha,gamma) determines the size of the table and the form of -// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well -// in practice: -// -// The idea is to cut the number c * w = f * 2^e into two parts, which can be -// processed independently: An integral part p1, and a fractional part p2: -// -// f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e -// = (f div 2^-e) + (f mod 2^-e) * 2^e -// = p1 + p2 * 2^e -// -// The conversion of p1 into decimal form requires a series of divisions and -// modulos by (a power of) 10. These operations are faster for 32-bit than for -// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be -// achieved by choosing -// -// -e >= 32 or e <= -32 := gamma -// -// In order to convert the fractional part -// -// p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ... -// -// into decimal form, the fraction is repeatedly multiplied by 10 and the digits -// d[-i] are extracted in order: -// -// (10 * p2) div 2^-e = d[-1] -// (10 * p2) mod 2^-e = d[-2] / 10^1 + ... -// -// The multiplication by 10 must not overflow. It is sufficient to choose -// -// 10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64. -// -// Since p2 = f mod 2^-e < 2^-e, -// -// -e <= 60 or e >= -60 := alpha - -constexpr int kAlpha = -60; -constexpr int kGamma = -32; - -struct cached_power // c = f * 2^e ~= 10^k -{ - std::uint64_t f; - int e; - int k; -}; - -/*! -For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached -power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c -satisfies (Definition 3.2 from [1]) - alpha <= e_c + e + q <= gamma. -*/ -inline cached_power get_cached_power_for_binary_exponent(int e) { - // Now - // - // alpha <= e_c + e + q <= gamma (1) - // ==> f_c * 2^alpha <= c * 2^e * 2^q - // - // and since the c's are normalized, 2^(q-1) <= f_c, - // - // ==> 2^(q - 1 + alpha) <= c * 2^(e + q) - // ==> 2^(alpha - e - 1) <= c - // - // If c were an exact power of ten, i.e. c = 10^k, one may determine k as - // - // k = ceil( log_10( 2^(alpha - e - 1) ) ) - // = ceil( (alpha - e - 1) * log_10(2) ) - // - // From the paper: - // "In theory the result of the procedure could be wrong since c is rounded, - // and the computation itself is approximated [...]. In practice, however, - // this simple function is sufficient." - // - // For IEEE double precision floating-point numbers converted into - // normalized diyfp's w = f * 2^e, with q = 64, - // - // e >= -1022 (min IEEE exponent) - // -52 (p - 1) - // -52 (p - 1, possibly normalize denormal IEEE numbers) - // -11 (normalize the diyfp) - // = -1137 - // - // and - // - // e <= +1023 (max IEEE exponent) - // -52 (p - 1) - // -11 (normalize the diyfp) - // = 960 - // - // This binary exponent range [-1137,960] results in a decimal exponent - // range [-307,324]. One does not need to store a cached power for each - // k in this range. For each such k it suffices to find a cached power - // such that the exponent of the product lies in [alpha,gamma]. - // This implies that the difference of the decimal exponents of adjacent - // table entries must be less than or equal to - // - // floor( (gamma - alpha) * log_10(2) ) = 8. - // - // (A smaller distance gamma-alpha would require a larger table.) - - // NB: - // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34. - - constexpr int kCachedPowersMinDecExp = -300; - constexpr int kCachedPowersDecStep = 8; - - static constexpr std::array kCachedPowers = {{ - {0xAB70FE17C79AC6CA, -1060, -300}, {0xFF77B1FCBEBCDC4F, -1034, -292}, - {0xBE5691EF416BD60C, -1007, -284}, {0x8DD01FAD907FFC3C, -980, -276}, - {0xD3515C2831559A83, -954, -268}, {0x9D71AC8FADA6C9B5, -927, -260}, - {0xEA9C227723EE8BCB, -901, -252}, {0xAECC49914078536D, -874, -244}, - {0x823C12795DB6CE57, -847, -236}, {0xC21094364DFB5637, -821, -228}, - {0x9096EA6F3848984F, -794, -220}, {0xD77485CB25823AC7, -768, -212}, - {0xA086CFCD97BF97F4, -741, -204}, {0xEF340A98172AACE5, -715, -196}, - {0xB23867FB2A35B28E, -688, -188}, {0x84C8D4DFD2C63F3B, -661, -180}, - {0xC5DD44271AD3CDBA, -635, -172}, {0x936B9FCEBB25C996, -608, -164}, - {0xDBAC6C247D62A584, -582, -156}, {0xA3AB66580D5FDAF6, -555, -148}, - {0xF3E2F893DEC3F126, -529, -140}, {0xB5B5ADA8AAFF80B8, -502, -132}, - {0x87625F056C7C4A8B, -475, -124}, {0xC9BCFF6034C13053, -449, -116}, - {0x964E858C91BA2655, -422, -108}, {0xDFF9772470297EBD, -396, -100}, - {0xA6DFBD9FB8E5B88F, -369, -92}, {0xF8A95FCF88747D94, -343, -84}, - {0xB94470938FA89BCF, -316, -76}, {0x8A08F0F8BF0F156B, -289, -68}, - {0xCDB02555653131B6, -263, -60}, {0x993FE2C6D07B7FAC, -236, -52}, - {0xE45C10C42A2B3B06, -210, -44}, {0xAA242499697392D3, -183, -36}, - {0xFD87B5F28300CA0E, -157, -28}, {0xBCE5086492111AEB, -130, -20}, - {0x8CBCCC096F5088CC, -103, -12}, {0xD1B71758E219652C, -77, -4}, - {0x9C40000000000000, -50, 4}, {0xE8D4A51000000000, -24, 12}, - {0xAD78EBC5AC620000, 3, 20}, {0x813F3978F8940984, 30, 28}, - {0xC097CE7BC90715B3, 56, 36}, {0x8F7E32CE7BEA5C70, 83, 44}, - {0xD5D238A4ABE98068, 109, 52}, {0x9F4F2726179A2245, 136, 60}, - {0xED63A231D4C4FB27, 162, 68}, {0xB0DE65388CC8ADA8, 189, 76}, - {0x83C7088E1AAB65DB, 216, 84}, {0xC45D1DF942711D9A, 242, 92}, - {0x924D692CA61BE758, 269, 100}, {0xDA01EE641A708DEA, 295, 108}, - {0xA26DA3999AEF774A, 322, 116}, {0xF209787BB47D6B85, 348, 124}, - {0xB454E4A179DD1877, 375, 132}, {0x865B86925B9BC5C2, 402, 140}, - {0xC83553C5C8965D3D, 428, 148}, {0x952AB45CFA97A0B3, 455, 156}, - {0xDE469FBD99A05FE3, 481, 164}, {0xA59BC234DB398C25, 508, 172}, - {0xF6C69A72A3989F5C, 534, 180}, {0xB7DCBF5354E9BECE, 561, 188}, - {0x88FCF317F22241E2, 588, 196}, {0xCC20CE9BD35C78A5, 614, 204}, - {0x98165AF37B2153DF, 641, 212}, {0xE2A0B5DC971F303A, 667, 220}, - {0xA8D9D1535CE3B396, 694, 228}, {0xFB9B7CD9A4A7443C, 720, 236}, - {0xBB764C4CA7A44410, 747, 244}, {0x8BAB8EEFB6409C1A, 774, 252}, - {0xD01FEF10A657842C, 800, 260}, {0x9B10A4E5E9913129, 827, 268}, - {0xE7109BFBA19C0C9D, 853, 276}, {0xAC2820D9623BF429, 880, 284}, - {0x80444B5E7AA7CF85, 907, 292}, {0xBF21E44003ACDD2D, 933, 300}, - {0x8E679C2F5E44FF8F, 960, 308}, {0xD433179D9C8CB841, 986, 316}, - {0x9E19DB92B4E31BA9, 1013, 324}, - }}; - - // This computation gives exactly the same results for k as - // k = ceil((kAlpha - e - 1) * 0.30102999566398114) - // for |e| <= 1500, but doesn't require floating-point operations. - // NB: log_10(2) ~= 78913 / 2^18 - const int f = kAlpha - e - 1; - const int k = (f * 78913) / (1 << 18) + static_cast(f > 0); - - const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / - kCachedPowersDecStep; - - const cached_power cached = kCachedPowers[static_cast(index)]; - - return cached; -} - -/*! -For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k. -For n == 0, returns 1 and sets pow10 := 1. -*/ -inline int find_largest_pow10(const std::uint32_t n, std::uint32_t &pow10) { - // LCOV_EXCL_START - if (n >= 1000000000) { - pow10 = 1000000000; - return 10; - } - // LCOV_EXCL_STOP - else if (n >= 100000000) { - pow10 = 100000000; - return 9; - } else if (n >= 10000000) { - pow10 = 10000000; - return 8; - } else if (n >= 1000000) { - pow10 = 1000000; - return 7; - } else if (n >= 100000) { - pow10 = 100000; - return 6; - } else if (n >= 10000) { - pow10 = 10000; - return 5; - } else if (n >= 1000) { - pow10 = 1000; - return 4; - } else if (n >= 100) { - pow10 = 100; - return 3; - } else if (n >= 10) { - pow10 = 10; - return 2; - } else { - pow10 = 1; - return 1; - } -} - -inline void grisu2_round(char *buf, int len, std::uint64_t dist, - std::uint64_t delta, std::uint64_t rest, - std::uint64_t ten_k) { - - // <--------------------------- delta ----> - // <---- dist ---------> - // --------------[------------------+-------------------]-------------- - // M- w M+ - // - // ten_k - // <------> - // <---- rest ----> - // --------------[------------------+----+--------------]-------------- - // w V - // = buf * 10^k - // - // ten_k represents a unit-in-the-last-place in the decimal representation - // stored in buf. - // Decrement buf by ten_k while this takes buf closer to w. - - // The tests are written in this order to avoid overflow in unsigned - // integer arithmetic. - - while (rest < dist && delta - rest >= ten_k && - (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) { - buf[len - 1]--; - rest += ten_k; - } -} - -/*! -Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+. -M- and M+ must be normalized and share the same exponent -60 <= e <= -32. -*/ -inline void grisu2_digit_gen(char *buffer, int &length, int &decimal_exponent, - diyfp M_minus, diyfp w, diyfp M_plus) { - static_assert(kAlpha >= -60, "internal error"); - static_assert(kGamma <= -32, "internal error"); - - // Generates the digits (and the exponent) of a decimal floating-point - // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's - // w, M- and M+ share the same exponent e, which satisfies alpha <= e <= - // gamma. - // - // <--------------------------- delta ----> - // <---- dist ---------> - // --------------[------------------+-------------------]-------------- - // M- w M+ - // - // Grisu2 generates the digits of M+ from left to right and stops as soon as - // V is in [M-,M+]. - - std::uint64_t delta = - diyfp::sub(M_plus, M_minus) - .f; // (significand of (M+ - M-), implicit exponent is e) - std::uint64_t dist = - diyfp::sub(M_plus, w) - .f; // (significand of (M+ - w ), implicit exponent is e) - - // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0): - // - // M+ = f * 2^e - // = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e - // = ((p1 ) * 2^-e + (p2 )) * 2^e - // = p1 + p2 * 2^e - - const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e); - - auto p1 = static_cast( - M_plus.f >> - -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.) - std::uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e - - // 1) - // - // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0] - - std::uint32_t pow10; - const int k = find_largest_pow10(p1, pow10); - - // 10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1) - // - // p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1)) - // = (d[k-1] ) * 10^(k-1) + (p1 mod 10^(k-1)) - // - // M+ = p1 + p2 * 2^e - // = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1)) + p2 * 2^e - // = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e - // = d[k-1] * 10^(k-1) + ( rest) * 2^e - // - // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0) - // - // p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0] - // - // but stop as soon as - // - // rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e - - int n = k; - while (n > 0) { - // Invariants: - // M+ = buffer * 10^n + (p1 + p2 * 2^e) (buffer = 0 for n = k) - // pow10 = 10^(n-1) <= p1 < 10^n - // - const std::uint32_t d = p1 / pow10; // d = p1 div 10^(n-1) - const std::uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1) - // - // M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e - // = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e) - // - buffer[length++] = static_cast('0' + d); // buffer := buffer * 10 + d - // - // M+ = buffer * 10^(n-1) + (r + p2 * 2^e) - // - p1 = r; - n--; - // - // M+ = buffer * 10^n + (p1 + p2 * 2^e) - // pow10 = 10^n - // - - // Now check if enough digits have been generated. - // Compute - // - // p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e - // - // Note: - // Since rest and delta share the same exponent e, it suffices to - // compare the significands. - const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2; - if (rest <= delta) { - // V = buffer * 10^n, with M- <= V <= M+. - - decimal_exponent += n; - - // We may now just stop. But instead look if the buffer could be - // decremented to bring V closer to w. - // - // pow10 = 10^n is now 1 ulp in the decimal representation V. - // The rounding procedure works with diyfp's with an implicit - // exponent of e. - // - // 10^n = (10^n * 2^-e) * 2^e = ulp * 2^e - // - const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e; - grisu2_round(buffer, length, dist, delta, rest, ten_n); - - return; - } - - pow10 /= 10; - // - // pow10 = 10^(n-1) <= p1 < 10^n - // Invariants restored. - } - - // 2) - // - // The digits of the integral part have been generated: - // - // M+ = d[k-1]...d[1]d[0] + p2 * 2^e - // = buffer + p2 * 2^e - // - // Now generate the digits of the fractional part p2 * 2^e. - // - // Note: - // No decimal point is generated: the exponent is adjusted instead. - // - // p2 actually represents the fraction - // - // p2 * 2^e - // = p2 / 2^-e - // = d[-1] / 10^1 + d[-2] / 10^2 + ... - // - // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...) - // - // p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m - // + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...) - // - // using - // - // 10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e) - // = ( d) * 2^-e + ( r) - // - // or - // 10^m * p2 * 2^e = d + r * 2^e - // - // i.e. - // - // M+ = buffer + p2 * 2^e - // = buffer + 10^-m * (d + r * 2^e) - // = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e - // - // and stop as soon as 10^-m * r * 2^e <= delta * 2^e - - int m = 0; - for (;;) { - // Invariant: - // M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...) - // * 2^e - // = buffer * 10^-m + 10^-m * (p2 ) - // * 2^e = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e = - // buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e + - // (10*p2 mod 2^-e)) * 2^e - // - p2 *= 10; - const std::uint64_t d = p2 >> -one.e; // d = (10 * p2) div 2^-e - const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e - // - // M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e - // = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e)) - // = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e - // - buffer[length++] = static_cast('0' + d); // buffer := buffer * 10 + d - // - // M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e - // - p2 = r; - m++; - // - // M+ = buffer * 10^-m + 10^-m * p2 * 2^e - // Invariant restored. - - // Check if enough digits have been generated. - // - // 10^-m * p2 * 2^e <= delta * 2^e - // p2 * 2^e <= 10^m * delta * 2^e - // p2 <= 10^m * delta - delta *= 10; - dist *= 10; - if (p2 <= delta) { - break; - } - } - - // V = buffer * 10^-m, with M- <= V <= M+. - - decimal_exponent -= m; - - // 1 ulp in the decimal representation is now 10^-m. - // Since delta and dist are now scaled by 10^m, we need to do the - // same with ulp in order to keep the units in sync. - // - // 10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e - // - const std::uint64_t ten_m = one.f; - grisu2_round(buffer, length, dist, delta, p2, ten_m); - - // By construction this algorithm generates the shortest possible decimal - // number (Loitsch, Theorem 6.2) which rounds back to w. - // For an input number of precision p, at least - // - // N = 1 + ceil(p * log_10(2)) - // - // decimal digits are sufficient to identify all binary floating-point - // numbers (Matula, "In-and-Out conversions"). - // This implies that the algorithm does not produce more than N decimal - // digits. - // - // N = 17 for p = 53 (IEEE double precision) - // N = 9 for p = 24 (IEEE single precision) -} - -/*! -v = buf * 10^decimal_exponent -len is the length of the buffer (number of decimal digits) -The buffer must be large enough, i.e. >= max_digits10. -*/ -inline void grisu2(char *buf, int &len, int &decimal_exponent, diyfp m_minus, - diyfp v, diyfp m_plus) { - - // --------(-----------------------+-----------------------)-------- (A) - // m- v m+ - // - // --------------------(-----------+-----------------------)-------- (B) - // m- v m+ - // - // First scale v (and m- and m+) such that the exponent is in the range - // [alpha, gamma]. - - const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e); - - const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k - - // The exponent of the products is = v.e + c_minus_k.e + q and is in the range - // [alpha,gamma] - const diyfp w = diyfp::mul(v, c_minus_k); - const diyfp w_minus = diyfp::mul(m_minus, c_minus_k); - const diyfp w_plus = diyfp::mul(m_plus, c_minus_k); - - // ----(---+---)---------------(---+---)---------------(---+---)---- - // w- w w+ - // = c*m- = c*v = c*m+ - // - // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and - // w+ are now off by a small amount. - // In fact: - // - // w - v * 10^k < 1 ulp - // - // To account for this inaccuracy, add resp. subtract 1 ulp. - // - // --------+---[---------------(---+---)---------------]---+-------- - // w- M- w M+ w+ - // - // Now any number in [M-, M+] (bounds included) will round to w when input, - // regardless of how the input rounding algorithm breaks ties. - // - // And digit_gen generates the shortest possible such number in [M-, M+]. - // Note that this does not mean that Grisu2 always generates the shortest - // possible number in the interval (m-, m+). - const diyfp M_minus(w_minus.f + 1, w_minus.e); - const diyfp M_plus(w_plus.f - 1, w_plus.e); - - decimal_exponent = -cached.k; // = -(-k) = k - - grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus); -} - -/*! -v = buf * 10^decimal_exponent -len is the length of the buffer (number of decimal digits) -The buffer must be large enough, i.e. >= max_digits10. -*/ -template -void grisu2(char *buf, int &len, int &decimal_exponent, FloatType value) { - static_assert(diyfp::kPrecision >= std::numeric_limits::digits + 3, - "internal error: not enough precision"); - - // If the neighbors (and boundaries) of 'value' are always computed for - // double-precision numbers, all float's can be recovered using strtod (and - // strtof). However, the resulting decimal representations are not exactly - // "short". - // - // The documentation for 'std::to_chars' - // (https://en.cppreference.com/w/cpp/utility/to_chars) says "value is - // converted to a string as if by std::sprintf in the default ("C") locale" - // and since sprintf promotes float's to double's, I think this is exactly - // what 'std::to_chars' does. On the other hand, the documentation for - // 'std::to_chars' requires that "parsing the representation using the - // corresponding std::from_chars function recovers value exactly". That - // indicates that single precision floating-point numbers should be recovered - // using 'std::strtof'. - // - // NB: If the neighbors are computed for single-precision numbers, there is a - // single float - // (7.0385307e-26f) which can't be recovered using strtod. The resulting - // double precision value is off by 1 ulp. -#if 0 - const boundaries w = compute_boundaries(static_cast(value)); -#else - const boundaries w = compute_boundaries(value); -#endif - - grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus); -} - -/*! -@brief appends a decimal representation of e to buf -@return a pointer to the element following the exponent. -@pre -1000 < e < 1000 -*/ -inline char *append_exponent(char *buf, int e) { - - if (e < 0) { - e = -e; - *buf++ = '-'; - } else { - *buf++ = '+'; - } - - auto k = static_cast(e); - if (k < 10) { - // Always print at least two digits in the exponent. - // This is for compatibility with printf("%g"). - *buf++ = '0'; - *buf++ = static_cast('0' + k); - } else if (k < 100) { - *buf++ = static_cast('0' + k / 10); - k %= 10; - *buf++ = static_cast('0' + k); - } else { - *buf++ = static_cast('0' + k / 100); - k %= 100; - *buf++ = static_cast('0' + k / 10); - k %= 10; - *buf++ = static_cast('0' + k); - } - - return buf; -} - -/*! -@brief prettify v = buf * 10^decimal_exponent -If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point -notation. Otherwise it will be printed in exponential notation. -@pre min_exp < 0 -@pre max_exp > 0 -*/ -inline char *format_buffer(char *buf, int len, int decimal_exponent, - int min_exp, int max_exp) { - - const int k = len; - const int n = len + decimal_exponent; - - // v = buf * 10^(n-k) - // k is the length of the buffer (number of decimal digits) - // n is the position of the decimal point relative to the start of the buffer. - - if (k <= n && n <= max_exp) { - // digits[000] - // len <= max_exp + 2 - - std::memset(buf + k, '0', static_cast(n) - static_cast(k)); - // Make it look like a floating-point number (#362, #378) - // buf[n + 0] = '.'; - // buf[n + 1] = '0'; - return buf + (static_cast(n)); - } - - if (0 < n && n <= max_exp) { - // dig.its - // len <= max_digits10 + 1 - std::memmove(buf + (static_cast(n) + 1), buf + n, - static_cast(k) - static_cast(n)); - buf[n] = '.'; - return buf + (static_cast(k) + 1U); - } - - if (min_exp < n && n <= 0) { - // 0.[000]digits - // len <= 2 + (-min_exp - 1) + max_digits10 - - std::memmove(buf + (2 + static_cast(-n)), buf, - static_cast(k)); - buf[0] = '0'; - buf[1] = '.'; - std::memset(buf + 2, '0', static_cast(-n)); - return buf + (2U + static_cast(-n) + static_cast(k)); - } - - if (k == 1) { - // dE+123 - // len <= 1 + 5 - - buf += 1; - } else { - // d.igitsE+123 - // len <= max_digits10 + 1 + 5 - - std::memmove(buf + 2, buf + 1, static_cast(k) - 1); - buf[1] = '.'; - buf += 1 + static_cast(k); - } - - *buf++ = 'e'; - return append_exponent(buf, n - 1); -} - -} // namespace dtoa_impl - -/*! -The format of the resulting decimal representation is similar to printf's %g -format. Returns an iterator pointing past-the-end of the decimal representation. -@note The input number must be finite, i.e. NaN's and Inf's are not supported. -@note The buffer must be large enough. -@note The result is NOT null-terminated. -*/ -char *to_chars(char *first, const char *last, double value) { - static_cast(last); // maybe unused - fix warning - bool negative = std::signbit(value); - if (negative) { - value = -value; - *first++ = '-'; - } - - if (value == 0) // +-0 - { - *first++ = '0'; - // Make it look like a floating-point number (#362, #378) - if(negative) { - *first++ = '.'; - *first++ = '0'; - } - return first; - } - // Compute v = buffer * 10^decimal_exponent. - // The decimal digits are stored in the buffer, which needs to be interpreted - // as an unsigned decimal integer. - // len is the length of the buffer, i.e. the number of decimal digits. - int len = 0; - int decimal_exponent = 0; - dtoa_impl::grisu2(first, len, decimal_exponent, value); - // Format the buffer like printf("%.*g", prec, value) - constexpr int kMinExp = -4; - constexpr int kMaxExp = std::numeric_limits::digits10; - - return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp, - kMaxExp); -} -} // namespace internal -} // namespace simdjson -/* end file src/to_chars.cpp */ -/* begin file src/from_chars.cpp */ -#include -namespace simdjson { -namespace internal { - -/** - * The code in the internal::from_chars function is meant to handle the floating-point number parsing - * when we have more than 19 digits in the decimal mantissa. This should only be seen - * in adversarial scenarios: we do not expect production systems to even produce - * such floating-point numbers. - * - * The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/) - * who credits Ken Thompson for the design (via a reference to the Go source - * code). See - * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c - * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c - * It is probably not very fast but it is a fallback that should almost never be - * called in real life. Google Wuffs is published under APL 2.0. - **/ - -namespace { -constexpr uint32_t max_digits = 768; -constexpr int32_t decimal_point_range = 2047; -} // namespace - -struct adjusted_mantissa { - uint64_t mantissa; - int power2; - adjusted_mantissa() : mantissa(0), power2(0) {} -}; - -struct decimal { - uint32_t num_digits; - int32_t decimal_point; - bool negative; - bool truncated; - uint8_t digits[max_digits]; -}; - -template struct binary_format { - static constexpr int mantissa_explicit_bits(); - static constexpr int minimum_exponent(); - static constexpr int infinite_power(); - static constexpr int sign_index(); -}; - -template <> constexpr int binary_format::mantissa_explicit_bits() { - return 52; -} - -template <> constexpr int binary_format::minimum_exponent() { - return -1023; -} -template <> constexpr int binary_format::infinite_power() { - return 0x7FF; -} - -template <> constexpr int binary_format::sign_index() { return 63; } - -bool is_integer(char c) noexcept { return (c >= '0' && c <= '9'); } - -// This should always succeed since it follows a call to parse_number. -decimal parse_decimal(const char *&p) noexcept { - decimal answer; - answer.num_digits = 0; - answer.decimal_point = 0; - answer.truncated = false; - answer.negative = (*p == '-'); - if ((*p == '-') || (*p == '+')) { - ++p; - } - - while (*p == '0') { - ++p; - } - while (is_integer(*p)) { - if (answer.num_digits < max_digits) { - answer.digits[answer.num_digits] = uint8_t(*p - '0'); - } - answer.num_digits++; - ++p; - } - if (*p == '.') { - ++p; - const char *first_after_period = p; - // if we have not yet encountered a zero, we have to skip it as well - if (answer.num_digits == 0) { - // skip zeros - while (*p == '0') { - ++p; - } - } - while (is_integer(*p)) { - if (answer.num_digits < max_digits) { - answer.digits[answer.num_digits] = uint8_t(*p - '0'); - } - answer.num_digits++; - ++p; - } - answer.decimal_point = int32_t(first_after_period - p); - } - if(answer.num_digits > 0) { - const char *preverse = p - 1; - int32_t trailing_zeros = 0; - while ((*preverse == '0') || (*preverse == '.')) { - if(*preverse == '0') { trailing_zeros++; }; - --preverse; - } - answer.decimal_point += int32_t(answer.num_digits); - answer.num_digits -= uint32_t(trailing_zeros); - } - if(answer.num_digits > max_digits ) { - answer.num_digits = max_digits; - answer.truncated = true; - } - if (('e' == *p) || ('E' == *p)) { - ++p; - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - int32_t exp_number = 0; // exponential part - while (is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); - if (exp_number < 0x10000) { - exp_number = 10 * exp_number + digit; - } - ++p; - } - answer.decimal_point += (neg_exp ? -exp_number : exp_number); - } - return answer; -} - -// This should always succeed since it follows a call to parse_number. -// Will not read at or beyond the "end" pointer. -decimal parse_decimal(const char *&p, const char * end) noexcept { - decimal answer; - answer.num_digits = 0; - answer.decimal_point = 0; - answer.truncated = false; - if(p == end) { return answer; } // should never happen - answer.negative = (*p == '-'); - if ((*p == '-') || (*p == '+')) { - ++p; - } - - while ((p != end) && (*p == '0')) { - ++p; - } - while ((p != end) && is_integer(*p)) { - if (answer.num_digits < max_digits) { - answer.digits[answer.num_digits] = uint8_t(*p - '0'); - } - answer.num_digits++; - ++p; - } - if ((p != end) && (*p == '.')) { - ++p; - if(p == end) { return answer; } // should never happen - const char *first_after_period = p; - // if we have not yet encountered a zero, we have to skip it as well - if (answer.num_digits == 0) { - // skip zeros - while (*p == '0') { - ++p; - } - } - while ((p != end) && is_integer(*p)) { - if (answer.num_digits < max_digits) { - answer.digits[answer.num_digits] = uint8_t(*p - '0'); - } - answer.num_digits++; - ++p; - } - answer.decimal_point = int32_t(first_after_period - p); - } - if(answer.num_digits > 0) { - const char *preverse = p - 1; - int32_t trailing_zeros = 0; - while ((*preverse == '0') || (*preverse == '.')) { - if(*preverse == '0') { trailing_zeros++; }; - --preverse; - } - answer.decimal_point += int32_t(answer.num_digits); - answer.num_digits -= uint32_t(trailing_zeros); - } - if(answer.num_digits > max_digits ) { - answer.num_digits = max_digits; - answer.truncated = true; - } - if ((p != end) && (('e' == *p) || ('E' == *p))) { - ++p; - if(p == end) { return answer; } // should never happen - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } else if ('+' == *p) { - ++p; - } - int32_t exp_number = 0; // exponential part - while ((p != end) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); - if (exp_number < 0x10000) { - exp_number = 10 * exp_number + digit; - } - ++p; - } - answer.decimal_point += (neg_exp ? -exp_number : exp_number); - } - return answer; -} - -namespace { - -// remove all final zeroes -inline void trim(decimal &h) { - while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) { - h.num_digits--; - } -} - -uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) { - shift &= 63; - const static uint16_t number_of_digits_decimal_left_shift_table[65] = { - 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, - 0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, - 0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, - 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0, - 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA, - 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, - 0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, - 0x051C, 0x051C, - }; - uint32_t x_a = number_of_digits_decimal_left_shift_table[shift]; - uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1]; - uint32_t num_new_digits = x_a >> 11; - uint32_t pow5_a = 0x7FF & x_a; - uint32_t pow5_b = 0x7FF & x_b; - const static uint8_t - number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = { - 5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, - 3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, - 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2, - 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1, - 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, - 3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, - 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, - 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5, - 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, - 0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, - 8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, - 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2, - 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, - 6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, - 2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, - 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6, - 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3, - 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, - 6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, - 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5, - 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5, - 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, - 1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, - 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, - 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7, - 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, - 3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, - 2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, - 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0, - 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8, - 8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, - 2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, - 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2, - 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5, - 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, - 5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, - 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, - 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9, - 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, - 2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, - 9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, - 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1, - 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, - 1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, - 1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, - 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1, - 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3, - 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, - 3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, - 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3, - 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6, - 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, - 6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, - 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, - 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2, - 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, - 6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5, - }; - const uint8_t *pow5 = - &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a]; - uint32_t i = 0; - uint32_t n = pow5_b - pow5_a; - for (; i < n; i++) { - if (i >= h.num_digits) { - return num_new_digits - 1; - } else if (h.digits[i] == pow5[i]) { - continue; - } else if (h.digits[i] < pow5[i]) { - return num_new_digits - 1; - } else { - return num_new_digits; - } - } - return num_new_digits; -} - -} // end of anonymous namespace - -uint64_t round(decimal &h) { - if ((h.num_digits == 0) || (h.decimal_point < 0)) { - return 0; - } else if (h.decimal_point > 18) { - return UINT64_MAX; - } - // at this point, we know that h.decimal_point >= 0 - uint32_t dp = uint32_t(h.decimal_point); - uint64_t n = 0; - for (uint32_t i = 0; i < dp; i++) { - n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0); - } - bool round_up = false; - if (dp < h.num_digits) { - round_up = h.digits[dp] >= 5; // normally, we round up - // but we may need to round to even! - if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) { - round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1])); - } - } - if (round_up) { - n++; - } - return n; -} - -// computes h * 2^-shift -void decimal_left_shift(decimal &h, uint32_t shift) { - if (h.num_digits == 0) { - return; - } - uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift); - int32_t read_index = int32_t(h.num_digits - 1); - uint32_t write_index = h.num_digits - 1 + num_new_digits; - uint64_t n = 0; - - while (read_index >= 0) { - n += uint64_t(h.digits[read_index]) << shift; - uint64_t quotient = n / 10; - uint64_t remainder = n - (10 * quotient); - if (write_index < max_digits) { - h.digits[write_index] = uint8_t(remainder); - } else if (remainder > 0) { - h.truncated = true; - } - n = quotient; - write_index--; - read_index--; - } - while (n > 0) { - uint64_t quotient = n / 10; - uint64_t remainder = n - (10 * quotient); - if (write_index < max_digits) { - h.digits[write_index] = uint8_t(remainder); - } else if (remainder > 0) { - h.truncated = true; - } - n = quotient; - write_index--; - } - h.num_digits += num_new_digits; - if (h.num_digits > max_digits) { - h.num_digits = max_digits; - } - h.decimal_point += int32_t(num_new_digits); - trim(h); -} - -// computes h * 2^shift -void decimal_right_shift(decimal &h, uint32_t shift) { - uint32_t read_index = 0; - uint32_t write_index = 0; - - uint64_t n = 0; - - while ((n >> shift) == 0) { - if (read_index < h.num_digits) { - n = (10 * n) + h.digits[read_index++]; - } else if (n == 0) { - return; - } else { - while ((n >> shift) == 0) { - n = 10 * n; - read_index++; - } - break; - } - } - h.decimal_point -= int32_t(read_index - 1); - if (h.decimal_point < -decimal_point_range) { // it is zero - h.num_digits = 0; - h.decimal_point = 0; - h.negative = false; - h.truncated = false; - return; - } - uint64_t mask = (uint64_t(1) << shift) - 1; - while (read_index < h.num_digits) { - uint8_t new_digit = uint8_t(n >> shift); - n = (10 * (n & mask)) + h.digits[read_index++]; - h.digits[write_index++] = new_digit; - } - while (n > 0) { - uint8_t new_digit = uint8_t(n >> shift); - n = 10 * (n & mask); - if (write_index < max_digits) { - h.digits[write_index++] = new_digit; - } else if (new_digit > 0) { - h.truncated = true; - } - } - h.num_digits = write_index; - trim(h); -} - -template adjusted_mantissa compute_float(decimal &d) { - adjusted_mantissa answer; - if (d.num_digits == 0) { - // should be zero - answer.power2 = 0; - answer.mantissa = 0; - return answer; - } - // At this point, going further, we can assume that d.num_digits > 0. - // We want to guard against excessive decimal point values because - // they can result in long running times. Indeed, we do - // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22 - // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not - // fine (runs for a long time). - // - if(d.decimal_point < -324) { - // We have something smaller than 1e-324 which is always zero - // in binary64 and binary32. - // It should be zero. - answer.power2 = 0; - answer.mantissa = 0; - return answer; - } else if(d.decimal_point >= 310) { - // We have something at least as large as 0.1e310 which is - // always infinite. - answer.power2 = binary::infinite_power(); - answer.mantissa = 0; - return answer; - } - - static const uint32_t max_shift = 60; - static const uint32_t num_powers = 19; - static const uint8_t powers[19] = { - 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, // - 33, 36, 39, 43, 46, 49, 53, 56, 59, // - }; - int32_t exp2 = 0; - while (d.decimal_point > 0) { - uint32_t n = uint32_t(d.decimal_point); - uint32_t shift = (n < num_powers) ? powers[n] : max_shift; - decimal_right_shift(d, shift); - if (d.decimal_point < -decimal_point_range) { - // should be zero - answer.power2 = 0; - answer.mantissa = 0; - return answer; - } - exp2 += int32_t(shift); - } - // We shift left toward [1/2 ... 1]. - while (d.decimal_point <= 0) { - uint32_t shift; - if (d.decimal_point == 0) { - if (d.digits[0] >= 5) { - break; - } - shift = (d.digits[0] < 2) ? 2 : 1; - } else { - uint32_t n = uint32_t(-d.decimal_point); - shift = (n < num_powers) ? powers[n] : max_shift; - } - decimal_left_shift(d, shift); - if (d.decimal_point > decimal_point_range) { - // we want to get infinity: - answer.power2 = 0xFF; - answer.mantissa = 0; - return answer; - } - exp2 -= int32_t(shift); - } - // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2]. - exp2--; - constexpr int32_t minimum_exponent = binary::minimum_exponent(); - while ((minimum_exponent + 1) > exp2) { - uint32_t n = uint32_t((minimum_exponent + 1) - exp2); - if (n > max_shift) { - n = max_shift; - } - decimal_right_shift(d, n); - exp2 += int32_t(n); - } - if ((exp2 - minimum_exponent) >= binary::infinite_power()) { - answer.power2 = binary::infinite_power(); - answer.mantissa = 0; - return answer; - } - - const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1; - decimal_left_shift(d, mantissa_size_in_bits); - - uint64_t mantissa = round(d); - // It is possible that we have an overflow, in which case we need - // to shift back. - if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) { - decimal_right_shift(d, 1); - exp2 += 1; - mantissa = round(d); - if ((exp2 - minimum_exponent) >= binary::infinite_power()) { - answer.power2 = binary::infinite_power(); - answer.mantissa = 0; - return answer; - } - } - answer.power2 = exp2 - binary::minimum_exponent(); - if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) { - answer.power2--; - } - answer.mantissa = - mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1); - return answer; -} - -template -adjusted_mantissa parse_long_mantissa(const char *first) { - decimal d = parse_decimal(first); - return compute_float(d); -} - -template -adjusted_mantissa parse_long_mantissa(const char *first, const char *end) { - decimal d = parse_decimal(first, end); - return compute_float(d); -} - -double from_chars(const char *first) noexcept { - bool negative = first[0] == '-'; - if (negative) { - first++; - } - adjusted_mantissa am = parse_long_mantissa>(first); - uint64_t word = am.mantissa; - word |= uint64_t(am.power2) - << binary_format::mantissa_explicit_bits(); - word = negative ? word | (uint64_t(1) << binary_format::sign_index()) - : word; - double value; - std::memcpy(&value, &word, sizeof(double)); - return value; -} - - -double from_chars(const char *first, const char *end) noexcept { - bool negative = first[0] == '-'; - if (negative) { - first++; - } - adjusted_mantissa am = parse_long_mantissa>(first, end); - uint64_t word = am.mantissa; - word |= uint64_t(am.power2) - << binary_format::mantissa_explicit_bits(); - word = negative ? word | (uint64_t(1) << binary_format::sign_index()) - : word; - double value; - std::memcpy(&value, &word, sizeof(double)); - return value; -} - -} // internal -} // simdjson -/* end file src/from_chars.cpp */ -/* begin file src/internal/error_tables.cpp */ - -namespace simdjson { -namespace internal { - - SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[] { - { SUCCESS, "No error" }, - { CAPACITY, "This parser can't support a document that big" }, - { MEMALLOC, "Error allocating memory, we're most likely out of memory" }, - { TAPE_ERROR, "The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." }, - { DEPTH_ERROR, "The JSON document was too deep (too many nested objects and arrays)" }, - { STRING_ERROR, "Problem while parsing a string" }, - { T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'" }, - { F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'" }, - { N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'" }, - { NUMBER_ERROR, "Problem while parsing a number" }, - { UTF8_ERROR, "The input is not valid UTF-8" }, - { UNINITIALIZED, "Uninitialized" }, - { EMPTY, "Empty: no JSON found" }, - { UNESCAPED_CHARS, "Within strings, some characters must be escaped, we found unescaped characters" }, - { UNCLOSED_STRING, "A string is opened, but never closed." }, - { UNSUPPORTED_ARCHITECTURE, "simdjson does not have an implementation supported by this CPU architecture (perhaps it's a non-SIMD CPU?)." }, - { INCORRECT_TYPE, "The JSON element does not have the requested type." }, - { NUMBER_OUT_OF_RANGE, "The JSON number is too large or too small to fit within the requested type." }, - { INDEX_OUT_OF_BOUNDS, "Attempted to access an element of a JSON array that is beyond its length." }, - { NO_SUCH_FIELD, "The JSON field referenced does not exist in this object." }, - { IO_ERROR, "Error reading the file." }, - { INVALID_JSON_POINTER, "Invalid JSON pointer syntax." }, - { INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." }, - { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" }, - { PARSER_IN_USE, "Cannot parse a new document while a document is still in use." }, - { OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." }, - { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." }, - { INCOMPLETE_ARRAY_OR_OBJECT, "JSON document ended early in the middle of an object or array." }, - { SCALAR_DOCUMENT_AS_VALUE, "A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, - { OUT_OF_BOUNDS, "Attempted to access location outside of document."} - }; // error_messages[] - -} // namespace internal -} // namespace simdjson -/* end file src/internal/error_tables.cpp */ -/* begin file src/internal/jsoncharutils_tables.cpp */ - -namespace simdjson { -namespace internal { - -// structural chars here are -// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) -// we are also interested in the four whitespace characters -// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d - -SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - -SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - -SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886] = { - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa, - 0xb, 0xc, 0xd, 0xe, 0xf, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xa, 0xb, 0xc, 0xd, 0xe, - 0xf, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0x0, 0x10, 0x20, 0x30, 0x40, 0x50, - 0x60, 0x70, 0x80, 0x90, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa0, - 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, - 0xf0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0x0, 0x100, 0x200, 0x300, 0x400, 0x500, - 0x600, 0x700, 0x800, 0x900, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa00, - 0xb00, 0xc00, 0xd00, 0xe00, 0xf00, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xa00, 0xb00, 0xc00, 0xd00, 0xe00, - 0xf00, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, - 0x6000, 0x7000, 0x8000, 0x9000, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa000, - 0xb000, 0xc000, 0xd000, 0xe000, 0xf000, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xa000, 0xb000, 0xc000, 0xd000, 0xe000, - 0xf000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, - 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; - -} // namespace internal -} // namespace simdjson -/* end file src/internal/jsoncharutils_tables.cpp */ -/* begin file src/internal/numberparsing_tables.cpp */ - -namespace simdjson { -namespace internal { - -// Precomputed powers of ten from 10^0 to 10^22. These -// can be represented exactly using the double type. -SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[] = { - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, - 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; - -/** - * When mapping numbers from decimal to binary, - * we go from w * 10^q to m * 2^p but we have - * 10^q = 5^q * 2^q, so effectively - * we are trying to match - * w * 2^q * 5^q to m * 2^p. Thus the powers of two - * are not a concern since they can be represented - * exactly using the binary notation, only the powers of five - * affect the binary significand. - */ - - -// The truncated powers of five from 5^-342 all the way to 5^308 -// The mantissa is truncated to 128 bits, and -// never rounded up. Uses about 10KB. -SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]= { - 0xeef453d6923bd65a,0x113faa2906a13b3f, - 0x9558b4661b6565f8,0x4ac7ca59a424c507, - 0xbaaee17fa23ebf76,0x5d79bcf00d2df649, - 0xe95a99df8ace6f53,0xf4d82c2c107973dc, - 0x91d8a02bb6c10594,0x79071b9b8a4be869, - 0xb64ec836a47146f9,0x9748e2826cdee284, - 0xe3e27a444d8d98b7,0xfd1b1b2308169b25, - 0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7, - 0xb208ef855c969f4f,0xbdbd2d335e51a935, - 0xde8b2b66b3bc4723,0xad2c788035e61382, - 0x8b16fb203055ac76,0x4c3bcb5021afcc31, - 0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d, - 0xd953e8624b85dd78,0xd71d6dad34a2af0d, - 0x87d4713d6f33aa6b,0x8672648c40e5ad68, - 0xa9c98d8ccb009506,0x680efdaf511f18c2, - 0xd43bf0effdc0ba48,0x212bd1b2566def2, - 0x84a57695fe98746d,0x14bb630f7604b57, - 0xa5ced43b7e3e9188,0x419ea3bd35385e2d, - 0xcf42894a5dce35ea,0x52064cac828675b9, - 0x818995ce7aa0e1b2,0x7343efebd1940993, - 0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8, - 0xca66fa129f9b60a6,0xd41a26e077774ef6, - 0xfd00b897478238d0,0x8920b098955522b4, - 0x9e20735e8cb16382,0x55b46e5f5d5535b0, - 0xc5a890362fddbc62,0xeb2189f734aa831d, - 0xf712b443bbd52b7b,0xa5e9ec7501d523e4, - 0x9a6bb0aa55653b2d,0x47b233c92125366e, - 0xc1069cd4eabe89f8,0x999ec0bb696e840a, - 0xf148440a256e2c76,0xc00670ea43ca250d, - 0x96cd2a865764dbca,0x380406926a5e5728, - 0xbc807527ed3e12bc,0xc605083704f5ecf2, - 0xeba09271e88d976b,0xf7864a44c633682e, - 0x93445b8731587ea3,0x7ab3ee6afbe0211d, - 0xb8157268fdae9e4c,0x5960ea05bad82964, - 0xe61acf033d1a45df,0x6fb92487298e33bd, - 0x8fd0c16206306bab,0xa5d3b6d479f8e056, - 0xb3c4f1ba87bc8696,0x8f48a4899877186c, - 0xe0b62e2929aba83c,0x331acdabfe94de87, - 0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14, - 0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9, - 0xdb71e91432b1a24a,0xc9e82cd9f69d6150, - 0x892731ac9faf056e,0xbe311c083a225cd2, - 0xab70fe17c79ac6ca,0x6dbd630a48aaf406, - 0xd64d3d9db981787d,0x92cbbccdad5b108, - 0x85f0468293f0eb4e,0x25bbf56008c58ea5, - 0xa76c582338ed2621,0xaf2af2b80af6f24e, - 0xd1476e2c07286faa,0x1af5af660db4aee1, - 0x82cca4db847945ca,0x50d98d9fc890ed4d, - 0xa37fce126597973c,0xe50ff107bab528a0, - 0xcc5fc196fefd7d0c,0x1e53ed49a96272c8, - 0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a, - 0x9faacf3df73609b1,0x77b191618c54e9ac, - 0xc795830d75038c1d,0xd59df5b9ef6a2417, - 0xf97ae3d0d2446f25,0x4b0573286b44ad1d, - 0x9becce62836ac577,0x4ee367f9430aec32, - 0xc2e801fb244576d5,0x229c41f793cda73f, - 0xf3a20279ed56d48a,0x6b43527578c1110f, - 0x9845418c345644d6,0x830a13896b78aaa9, - 0xbe5691ef416bd60c,0x23cc986bc656d553, - 0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8, - 0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9, - 0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53, - 0xe858ad248f5c22c9,0xd1b3400f8f9cff68, - 0x91376c36d99995be,0x23100809b9c21fa1, - 0xb58547448ffffb2d,0xabd40a0c2832a78a, - 0xe2e69915b3fff9f9,0x16c90c8f323f516c, - 0x8dd01fad907ffc3b,0xae3da7d97f6792e3, - 0xb1442798f49ffb4a,0x99cd11cfdf41779c, - 0xdd95317f31c7fa1d,0x40405643d711d583, - 0x8a7d3eef7f1cfc52,0x482835ea666b2572, - 0xad1c8eab5ee43b66,0xda3243650005eecf, - 0xd863b256369d4a40,0x90bed43e40076a82, - 0x873e4f75e2224e68,0x5a7744a6e804a291, - 0xa90de3535aaae202,0x711515d0a205cb36, - 0xd3515c2831559a83,0xd5a5b44ca873e03, - 0x8412d9991ed58091,0xe858790afe9486c2, - 0xa5178fff668ae0b6,0x626e974dbe39a872, - 0xce5d73ff402d98e3,0xfb0a3d212dc8128f, - 0x80fa687f881c7f8e,0x7ce66634bc9d0b99, - 0xa139029f6a239f72,0x1c1fffc1ebc44e80, - 0xc987434744ac874e,0xa327ffb266b56220, - 0xfbe9141915d7a922,0x4bf1ff9f0062baa8, - 0x9d71ac8fada6c9b5,0x6f773fc3603db4a9, - 0xc4ce17b399107c22,0xcb550fb4384d21d3, - 0xf6019da07f549b2b,0x7e2a53a146606a48, - 0x99c102844f94e0fb,0x2eda7444cbfc426d, - 0xc0314325637a1939,0xfa911155fefb5308, - 0xf03d93eebc589f88,0x793555ab7eba27ca, - 0x96267c7535b763b5,0x4bc1558b2f3458de, - 0xbbb01b9283253ca2,0x9eb1aaedfb016f16, - 0xea9c227723ee8bcb,0x465e15a979c1cadc, - 0x92a1958a7675175f,0xbfacd89ec191ec9, - 0xb749faed14125d36,0xcef980ec671f667b, - 0xe51c79a85916f484,0x82b7e12780e7401a, - 0x8f31cc0937ae58d2,0xd1b2ecb8b0908810, - 0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15, - 0xdfbdcece67006ac9,0x67a791e093e1d49a, - 0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0, - 0xaecc49914078536d,0x58fae9f773886e18, - 0xda7f5bf590966848,0xaf39a475506a899e, - 0x888f99797a5e012d,0x6d8406c952429603, - 0xaab37fd7d8f58178,0xc8e5087ba6d33b83, - 0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64, - 0x855c3be0a17fcd26,0x5cf2eea09a55067f, - 0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e, - 0xd0601d8efc57b08b,0xf13b94daf124da26, - 0x823c12795db6ce57,0x76c53d08d6b70858, - 0xa2cb1717b52481ed,0x54768c4b0c64ca6e, - 0xcb7ddcdda26da268,0xa9942f5dcf7dfd09, - 0xfe5d54150b090b02,0xd3f93b35435d7c4c, - 0x9efa548d26e5a6e1,0xc47bc5014a1a6daf, - 0xc6b8e9b0709f109a,0x359ab6419ca1091b, - 0xf867241c8cc6d4c0,0xc30163d203c94b62, - 0x9b407691d7fc44f8,0x79e0de63425dcf1d, - 0xc21094364dfb5636,0x985915fc12f542e4, - 0xf294b943e17a2bc4,0x3e6f5b7b17b2939d, - 0x979cf3ca6cec5b5a,0xa705992ceecf9c42, - 0xbd8430bd08277231,0x50c6ff782a838353, - 0xece53cec4a314ebd,0xa4f8bf5635246428, - 0x940f4613ae5ed136,0x871b7795e136be99, - 0xb913179899f68584,0x28e2557b59846e3f, - 0xe757dd7ec07426e5,0x331aeada2fe589cf, - 0x9096ea6f3848984f,0x3ff0d2c85def7621, - 0xb4bca50b065abe63,0xfed077a756b53a9, - 0xe1ebce4dc7f16dfb,0xd3e8495912c62894, - 0x8d3360f09cf6e4bd,0x64712dd7abbbd95c, - 0xb080392cc4349dec,0xbd8d794d96aacfb3, - 0xdca04777f541c567,0xecf0d7a0fc5583a0, - 0x89e42caaf9491b60,0xf41686c49db57244, - 0xac5d37d5b79b6239,0x311c2875c522ced5, - 0xd77485cb25823ac7,0x7d633293366b828b, - 0x86a8d39ef77164bc,0xae5dff9c02033197, - 0xa8530886b54dbdeb,0xd9f57f830283fdfc, - 0xd267caa862a12d66,0xd072df63c324fd7b, - 0x8380dea93da4bc60,0x4247cb9e59f71e6d, - 0xa46116538d0deb78,0x52d9be85f074e608, - 0xcd795be870516656,0x67902e276c921f8b, - 0x806bd9714632dff6,0xba1cd8a3db53b6, - 0xa086cfcd97bf97f3,0x80e8a40eccd228a4, - 0xc8a883c0fdaf7df0,0x6122cd128006b2cd, - 0xfad2a4b13d1b5d6c,0x796b805720085f81, - 0x9cc3a6eec6311a63,0xcbe3303674053bb0, - 0xc3f490aa77bd60fc,0xbedbfc4411068a9c, - 0xf4f1b4d515acb93b,0xee92fb5515482d44, - 0x991711052d8bf3c5,0x751bdd152d4d1c4a, - 0xbf5cd54678eef0b6,0xd262d45a78a0635d, - 0xef340a98172aace4,0x86fb897116c87c34, - 0x9580869f0e7aac0e,0xd45d35e6ae3d4da0, - 0xbae0a846d2195712,0x8974836059cca109, - 0xe998d258869facd7,0x2bd1a438703fc94b, - 0x91ff83775423cc06,0x7b6306a34627ddcf, - 0xb67f6455292cbf08,0x1a3bc84c17b1d542, - 0xe41f3d6a7377eeca,0x20caba5f1d9e4a93, - 0x8e938662882af53e,0x547eb47b7282ee9c, - 0xb23867fb2a35b28d,0xe99e619a4f23aa43, - 0xdec681f9f4c31f31,0x6405fa00e2ec94d4, - 0x8b3c113c38f9f37e,0xde83bc408dd3dd04, - 0xae0b158b4738705e,0x9624ab50b148d445, - 0xd98ddaee19068c76,0x3badd624dd9b0957, - 0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6, - 0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c, - 0xd47487cc8470652b,0x7647c3200069671f, - 0x84c8d4dfd2c63f3b,0x29ecd9f40041e073, - 0xa5fb0a17c777cf09,0xf468107100525890, - 0xcf79cc9db955c2cc,0x7182148d4066eeb4, - 0x81ac1fe293d599bf,0xc6f14cd848405530, - 0xa21727db38cb002f,0xb8ada00e5a506a7c, - 0xca9cf1d206fdc03b,0xa6d90811f0e4851c, - 0xfd442e4688bd304a,0x908f4a166d1da663, - 0x9e4a9cec15763e2e,0x9a598e4e043287fe, - 0xc5dd44271ad3cdba,0x40eff1e1853f29fd, - 0xf7549530e188c128,0xd12bee59e68ef47c, - 0x9a94dd3e8cf578b9,0x82bb74f8301958ce, - 0xc13a148e3032d6e7,0xe36a52363c1faf01, - 0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1, - 0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9, - 0xbcb2b812db11a5de,0x7415d448f6b6f0e7, - 0xebdf661791d60f56,0x111b495b3464ad21, - 0x936b9fcebb25c995,0xcab10dd900beec34, - 0xb84687c269ef3bfb,0x3d5d514f40eea742, - 0xe65829b3046b0afa,0xcb4a5a3112a5112, - 0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab, - 0xb3f4e093db73a093,0x59ed216765690f56, - 0xe0f218b8d25088b8,0x306869c13ec3532c, - 0x8c974f7383725573,0x1e414218c73a13fb, - 0xafbd2350644eeacf,0xe5d1929ef90898fa, - 0xdbac6c247d62a583,0xdf45f746b74abf39, - 0x894bc396ce5da772,0x6b8bba8c328eb783, - 0xab9eb47c81f5114f,0x66ea92f3f326564, - 0xd686619ba27255a2,0xc80a537b0efefebd, - 0x8613fd0145877585,0xbd06742ce95f5f36, - 0xa798fc4196e952e7,0x2c48113823b73704, - 0xd17f3b51fca3a7a0,0xf75a15862ca504c5, - 0x82ef85133de648c4,0x9a984d73dbe722fb, - 0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba, - 0xcc963fee10b7d1b3,0x318df905079926a8, - 0xffbbcfe994e5c61f,0xfdf17746497f7052, - 0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633, - 0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0, - 0xf9bd690a1b68637b,0x3dfdce7aa3c673b0, - 0x9c1661a651213e2d,0x6bea10ca65c084e, - 0xc31bfa0fe5698db8,0x486e494fcff30a62, - 0xf3e2f893dec3f126,0x5a89dba3c3efccfa, - 0x986ddb5c6b3a76b7,0xf89629465a75e01c, - 0xbe89523386091465,0xf6bbb397f1135823, - 0xee2ba6c0678b597f,0x746aa07ded582e2c, - 0x94db483840b717ef,0xa8c2a44eb4571cdc, - 0xba121a4650e4ddeb,0x92f34d62616ce413, - 0xe896a0d7e51e1566,0x77b020baf9c81d17, - 0x915e2486ef32cd60,0xace1474dc1d122e, - 0xb5b5ada8aaff80b8,0xd819992132456ba, - 0xe3231912d5bf60e6,0x10e1fff697ed6c69, - 0x8df5efabc5979c8f,0xca8d3ffa1ef463c1, - 0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2, - 0xddd0467c64bce4a0,0xac7cb3f6d05ddbde, - 0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b, - 0xad4ab7112eb3929d,0x86c16c98d2c953c6, - 0xd89d64d57a607744,0xe871c7bf077ba8b7, - 0x87625f056c7c4a8b,0x11471cd764ad4972, - 0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf, - 0xd389b47879823479,0x4aff1d108d4ec2c3, - 0x843610cb4bf160cb,0xcedf722a585139ba, - 0xa54394fe1eedb8fe,0xc2974eb4ee658828, - 0xce947a3da6a9273e,0x733d226229feea32, - 0x811ccc668829b887,0x806357d5a3f525f, - 0xa163ff802a3426a8,0xca07c2dcb0cf26f7, - 0xc9bcff6034c13052,0xfc89b393dd02f0b5, - 0xfc2c3f3841f17c67,0xbbac2078d443ace2, - 0x9d9ba7832936edc0,0xd54b944b84aa4c0d, - 0xc5029163f384a931,0xa9e795e65d4df11, - 0xf64335bcf065d37d,0x4d4617b5ff4a16d5, - 0x99ea0196163fa42e,0x504bced1bf8e4e45, - 0xc06481fb9bcf8d39,0xe45ec2862f71e1d6, - 0xf07da27a82c37088,0x5d767327bb4e5a4c, - 0x964e858c91ba2655,0x3a6a07f8d510f86f, - 0xbbe226efb628afea,0x890489f70a55368b, - 0xeadab0aba3b2dbe5,0x2b45ac74ccea842e, - 0x92c8ae6b464fc96f,0x3b0b8bc90012929d, - 0xb77ada0617e3bbcb,0x9ce6ebb40173744, - 0xe55990879ddcaabd,0xcc420a6a101d0515, - 0x8f57fa54c2a9eab6,0x9fa946824a12232d, - 0xb32df8e9f3546564,0x47939822dc96abf9, - 0xdff9772470297ebd,0x59787e2b93bc56f7, - 0x8bfbea76c619ef36,0x57eb4edb3c55b65a, - 0xaefae51477a06b03,0xede622920b6b23f1, - 0xdab99e59958885c4,0xe95fab368e45eced, - 0x88b402f7fd75539b,0x11dbcb0218ebb414, - 0xaae103b5fcd2a881,0xd652bdc29f26a119, - 0xd59944a37c0752a2,0x4be76d3346f0495f, - 0x857fcae62d8493a5,0x6f70a4400c562ddb, - 0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952, - 0xd097ad07a71f26b2,0x7e2000a41346a7a7, - 0x825ecc24c873782f,0x8ed400668c0c28c8, - 0xa2f67f2dfa90563b,0x728900802f0f32fa, - 0xcbb41ef979346bca,0x4f2b40a03ad2ffb9, - 0xfea126b7d78186bc,0xe2f610c84987bfa8, - 0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9, - 0xc6ede63fa05d3143,0x91503d1c79720dbb, - 0xf8a95fcf88747d94,0x75a44c6397ce912a, - 0x9b69dbe1b548ce7c,0xc986afbe3ee11aba, - 0xc24452da229b021b,0xfbe85badce996168, - 0xf2d56790ab41c2a2,0xfae27299423fb9c3, - 0x97c560ba6b0919a5,0xdccd879fc967d41a, - 0xbdb6b8e905cb600f,0x5400e987bbc1c920, - 0xed246723473e3813,0x290123e9aab23b68, - 0x9436c0760c86e30b,0xf9a0b6720aaf6521, - 0xb94470938fa89bce,0xf808e40e8d5b3e69, - 0xe7958cb87392c2c2,0xb60b1d1230b20e04, - 0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2, - 0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3, - 0xe2280b6c20dd5232,0x25c6da63c38de1b0, - 0x8d590723948a535f,0x579c487e5a38ad0e, - 0xb0af48ec79ace837,0x2d835a9df0c6d851, - 0xdcdb1b2798182244,0xf8e431456cf88e65, - 0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff, - 0xac8b2d36eed2dac5,0xe272467e3d222f3f, - 0xd7adf884aa879177,0x5b0ed81dcc6abb0f, - 0x86ccbb52ea94baea,0x98e947129fc2b4e9, - 0xa87fea27a539e9a5,0x3f2398d747b36224, - 0xd29fe4b18e88640e,0x8eec7f0d19a03aad, - 0x83a3eeeef9153e89,0x1953cf68300424ac, - 0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7, - 0xcdb02555653131b6,0x3792f412cb06794d, - 0x808e17555f3ebf11,0xe2bbd88bbee40bd0, - 0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4, - 0xc8de047564d20a8b,0xf245825a5a445275, - 0xfb158592be068d2e,0xeed6e2f0f0d56712, - 0x9ced737bb6c4183d,0x55464dd69685606b, - 0xc428d05aa4751e4c,0xaa97e14c3c26b886, - 0xf53304714d9265df,0xd53dd99f4b3066a8, - 0x993fe2c6d07b7fab,0xe546a8038efe4029, - 0xbf8fdb78849a5f96,0xde98520472bdd033, - 0xef73d256a5c0f77c,0x963e66858f6d4440, - 0x95a8637627989aad,0xdde7001379a44aa8, - 0xbb127c53b17ec159,0x5560c018580d5d52, - 0xe9d71b689dde71af,0xaab8f01e6e10b4a6, - 0x9226712162ab070d,0xcab3961304ca70e8, - 0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22, - 0xe45c10c42a2b3b05,0x8cb89a7db77c506a, - 0x8eb98a7a9a5b04e3,0x77f3608e92adb242, - 0xb267ed1940f1c61c,0x55f038b237591ed3, - 0xdf01e85f912e37a3,0x6b6c46dec52f6688, - 0x8b61313bbabce2c6,0x2323ac4b3b3da015, - 0xae397d8aa96c1b77,0xabec975e0a0d081a, - 0xd9c7dced53c72255,0x96e7bd358c904a21, - 0x881cea14545c7575,0x7e50d64177da2e54, - 0xaa242499697392d2,0xdde50bd1d5d0b9e9, - 0xd4ad2dbfc3d07787,0x955e4ec64b44e864, - 0x84ec3c97da624ab4,0xbd5af13bef0b113e, - 0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e, - 0xcfb11ead453994ba,0x67de18eda5814af2, - 0x81ceb32c4b43fcf4,0x80eacf948770ced7, - 0xa2425ff75e14fc31,0xa1258379a94d028d, - 0xcad2f7f5359a3b3e,0x96ee45813a04330, - 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, - 0x9e74d1b791e07e48,0x775ea264cf55347e, - 0xc612062576589dda,0x95364afe032a81a0, - 0xf79687aed3eec551,0x3a83ddbd83f52210, - 0x9abe14cd44753b52,0xc4926a9672793580, - 0xc16d9a0095928a27,0x75b7053c0f178400, - 0xf1c90080baf72cb1,0x5324c68b12dd6800, - 0x971da05074da7bee,0xd3f6fc16ebca8000, - 0xbce5086492111aea,0x88f4bb1ca6bd0000, - 0xec1e4a7db69561a5,0x2b31e9e3d0700000, - 0x9392ee8e921d5d07,0x3aff322e62600000, - 0xb877aa3236a4b449,0x9befeb9fad487c3, - 0xe69594bec44de15b,0x4c2ebe687989a9b4, - 0x901d7cf73ab0acd9,0xf9d37014bf60a11, - 0xb424dc35095cd80f,0x538484c19ef38c95, - 0xe12e13424bb40e13,0x2865a5f206b06fba, - 0x8cbccc096f5088cb,0xf93f87b7442e45d4, - 0xafebff0bcb24aafe,0xf78f69a51539d749, - 0xdbe6fecebdedd5be,0xb573440e5a884d1c, - 0x89705f4136b4a597,0x31680a88f8953031, - 0xabcc77118461cefc,0xfdc20d2b36ba7c3e, - 0xd6bf94d5e57a42bc,0x3d32907604691b4d, - 0x8637bd05af6c69b5,0xa63f9a49c2c1b110, - 0xa7c5ac471b478423,0xfcf80dc33721d54, - 0xd1b71758e219652b,0xd3c36113404ea4a9, - 0x83126e978d4fdf3b,0x645a1cac083126ea, - 0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4, - 0xcccccccccccccccc,0xcccccccccccccccd, - 0x8000000000000000,0x0, - 0xa000000000000000,0x0, - 0xc800000000000000,0x0, - 0xfa00000000000000,0x0, - 0x9c40000000000000,0x0, - 0xc350000000000000,0x0, - 0xf424000000000000,0x0, - 0x9896800000000000,0x0, - 0xbebc200000000000,0x0, - 0xee6b280000000000,0x0, - 0x9502f90000000000,0x0, - 0xba43b74000000000,0x0, - 0xe8d4a51000000000,0x0, - 0x9184e72a00000000,0x0, - 0xb5e620f480000000,0x0, - 0xe35fa931a0000000,0x0, - 0x8e1bc9bf04000000,0x0, - 0xb1a2bc2ec5000000,0x0, - 0xde0b6b3a76400000,0x0, - 0x8ac7230489e80000,0x0, - 0xad78ebc5ac620000,0x0, - 0xd8d726b7177a8000,0x0, - 0x878678326eac9000,0x0, - 0xa968163f0a57b400,0x0, - 0xd3c21bcecceda100,0x0, - 0x84595161401484a0,0x0, - 0xa56fa5b99019a5c8,0x0, - 0xcecb8f27f4200f3a,0x0, - 0x813f3978f8940984,0x4000000000000000, - 0xa18f07d736b90be5,0x5000000000000000, - 0xc9f2c9cd04674ede,0xa400000000000000, - 0xfc6f7c4045812296,0x4d00000000000000, - 0x9dc5ada82b70b59d,0xf020000000000000, - 0xc5371912364ce305,0x6c28000000000000, - 0xf684df56c3e01bc6,0xc732000000000000, - 0x9a130b963a6c115c,0x3c7f400000000000, - 0xc097ce7bc90715b3,0x4b9f100000000000, - 0xf0bdc21abb48db20,0x1e86d40000000000, - 0x96769950b50d88f4,0x1314448000000000, - 0xbc143fa4e250eb31,0x17d955a000000000, - 0xeb194f8e1ae525fd,0x5dcfab0800000000, - 0x92efd1b8d0cf37be,0x5aa1cae500000000, - 0xb7abc627050305ad,0xf14a3d9e40000000, - 0xe596b7b0c643c719,0x6d9ccd05d0000000, - 0x8f7e32ce7bea5c6f,0xe4820023a2000000, - 0xb35dbf821ae4f38b,0xdda2802c8a800000, - 0xe0352f62a19e306e,0xd50b2037ad200000, - 0x8c213d9da502de45,0x4526f422cc340000, - 0xaf298d050e4395d6,0x9670b12b7f410000, - 0xdaf3f04651d47b4c,0x3c0cdd765f114000, - 0x88d8762bf324cd0f,0xa5880a69fb6ac800, - 0xab0e93b6efee0053,0x8eea0d047a457a00, - 0xd5d238a4abe98068,0x72a4904598d6d880, - 0x85a36366eb71f041,0x47a6da2b7f864750, - 0xa70c3c40a64e6c51,0x999090b65f67d924, - 0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d, - 0x82818f1281ed449f,0xbff8f10e7a8921a4, - 0xa321f2d7226895c7,0xaff72d52192b6a0d, - 0xcbea6f8ceb02bb39,0x9bf4f8a69f764490, - 0xfee50b7025c36a08,0x2f236d04753d5b4, - 0x9f4f2726179a2245,0x1d762422c946590, - 0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5, - 0xf8ebad2b84e0d58b,0xd2e0898765a7deb2, - 0x9b934c3b330c8577,0x63cc55f49f88eb2f, - 0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb, - 0xf316271c7fc3908a,0x8bef464e3945ef7a, - 0x97edd871cfda3a56,0x97758bf0e3cbb5ac, - 0xbde94e8e43d0c8ec,0x3d52eeed1cbea317, - 0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd, - 0x945e455f24fb1cf8,0x8fe8caa93e74ef6a, - 0xb975d6b6ee39e436,0xb3e2fd538e122b44, - 0xe7d34c64a9c85d44,0x60dbbca87196b616, - 0x90e40fbeea1d3a4a,0xbc8955e946fe31cd, - 0xb51d13aea4a488dd,0x6babab6398bdbe41, - 0xe264589a4dcdab14,0xc696963c7eed2dd1, - 0x8d7eb76070a08aec,0xfc1e1de5cf543ca2, - 0xb0de65388cc8ada8,0x3b25a55f43294bcb, - 0xdd15fe86affad912,0x49ef0eb713f39ebe, - 0x8a2dbf142dfcc7ab,0x6e3569326c784337, - 0xacb92ed9397bf996,0x49c2c37f07965404, - 0xd7e77a8f87daf7fb,0xdc33745ec97be906, - 0x86f0ac99b4e8dafd,0x69a028bb3ded71a3, - 0xa8acd7c0222311bc,0xc40832ea0d68ce0c, - 0xd2d80db02aabd62b,0xf50a3fa490c30190, - 0x83c7088e1aab65db,0x792667c6da79e0fa, - 0xa4b8cab1a1563f52,0x577001b891185938, - 0xcde6fd5e09abcf26,0xed4c0226b55e6f86, - 0x80b05e5ac60b6178,0x544f8158315b05b4, - 0xa0dc75f1778e39d6,0x696361ae3db1c721, - 0xc913936dd571c84c,0x3bc3a19cd1e38e9, - 0xfb5878494ace3a5f,0x4ab48a04065c723, - 0x9d174b2dcec0e47b,0x62eb0d64283f9c76, - 0xc45d1df942711d9a,0x3ba5d0bd324f8394, - 0xf5746577930d6500,0xca8f44ec7ee36479, - 0x9968bf6abbe85f20,0x7e998b13cf4e1ecb, - 0xbfc2ef456ae276e8,0x9e3fedd8c321a67e, - 0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e, - 0x95d04aee3b80ece5,0xbba1f1d158724a12, - 0xbb445da9ca61281f,0x2a8a6e45ae8edc97, - 0xea1575143cf97226,0xf52d09d71a3293bd, - 0x924d692ca61be758,0x593c2626705f9c56, - 0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c, - 0xe498f455c38b997a,0xb6dfb9c0f956447, - 0x8edf98b59a373fec,0x4724bd4189bd5eac, - 0xb2977ee300c50fe7,0x58edec91ec2cb657, - 0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed, - 0x8b865b215899f46c,0xbd79e0d20082ee74, - 0xae67f1e9aec07187,0xecd8590680a3aa11, - 0xda01ee641a708de9,0xe80e6f4820cc9495, - 0x884134fe908658b2,0x3109058d147fdcdd, - 0xaa51823e34a7eede,0xbd4b46f0599fd415, - 0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a, - 0x850fadc09923329e,0x3e2cf6bc604ddb0, - 0xa6539930bf6bff45,0x84db8346b786151c, - 0xcfe87f7cef46ff16,0xe612641865679a63, - 0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e, - 0xa26da3999aef7749,0xe3be5e330f38f09d, - 0xcb090c8001ab551c,0x5cadf5bfd3072cc5, - 0xfdcb4fa002162a63,0x73d9732fc7c8f7f6, - 0x9e9f11c4014dda7e,0x2867e7fddcdd9afa, - 0xc646d63501a1511d,0xb281e1fd541501b8, - 0xf7d88bc24209a565,0x1f225a7ca91a4226, - 0x9ae757596946075f,0x3375788de9b06958, - 0xc1a12d2fc3978937,0x52d6b1641c83ae, - 0xf209787bb47d6b84,0xc0678c5dbd23a49a, - 0x9745eb4d50ce6332,0xf840b7ba963646e0, - 0xbd176620a501fbff,0xb650e5a93bc3d898, - 0xec5d3fa8ce427aff,0xa3e51f138ab4cebe, - 0x93ba47c980e98cdf,0xc66f336c36b10137, - 0xb8a8d9bbe123f017,0xb80b0047445d4184, - 0xe6d3102ad96cec1d,0xa60dc059157491e5, - 0x9043ea1ac7e41392,0x87c89837ad68db2f, - 0xb454e4a179dd1877,0x29babe4598c311fb, - 0xe16a1dc9d8545e94,0xf4296dd6fef3d67a, - 0x8ce2529e2734bb1d,0x1899e4a65f58660c, - 0xb01ae745b101e9e4,0x5ec05dcff72e7f8f, - 0xdc21a1171d42645d,0x76707543f4fa1f73, - 0x899504ae72497eba,0x6a06494a791c53a8, - 0xabfa45da0edbde69,0x487db9d17636892, - 0xd6f8d7509292d603,0x45a9d2845d3c42b6, - 0x865b86925b9bc5c2,0xb8a2392ba45a9b2, - 0xa7f26836f282b732,0x8e6cac7768d7141e, - 0xd1ef0244af2364ff,0x3207d795430cd926, - 0x8335616aed761f1f,0x7f44e6bd49e807b8, - 0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6, - 0xcd036837130890a1,0x36dba887c37a8c0f, - 0x802221226be55a64,0xc2494954da2c9789, - 0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c, - 0xc83553c5c8965d3d,0x6f92829494e5acc7, - 0xfa42a8b73abbf48c,0xcb772339ba1f17f9, - 0x9c69a97284b578d7,0xff2a760414536efb, - 0xc38413cf25e2d70d,0xfef5138519684aba, - 0xf46518c2ef5b8cd1,0x7eb258665fc25d69, - 0x98bf2f79d5993802,0xef2f773ffbd97a61, - 0xbeeefb584aff8603,0xaafb550ffacfd8fa, - 0xeeaaba2e5dbf6784,0x95ba2a53f983cf38, - 0x952ab45cfa97a0b2,0xdd945a747bf26183, - 0xba756174393d88df,0x94f971119aeef9e4, - 0xe912b9d1478ceb17,0x7a37cd5601aab85d, - 0x91abb422ccb812ee,0xac62e055c10ab33a, - 0xb616a12b7fe617aa,0x577b986b314d6009, - 0xe39c49765fdf9d94,0xed5a7e85fda0b80b, - 0x8e41ade9fbebc27d,0x14588f13be847307, - 0xb1d219647ae6b31c,0x596eb2d8ae258fc8, - 0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb, - 0x8aec23d680043bee,0x25de7bb9480d5854, - 0xada72ccc20054ae9,0xaf561aa79a10ae6a, - 0xd910f7ff28069da4,0x1b2ba1518094da04, - 0x87aa9aff79042286,0x90fb44d2f05d0842, - 0xa99541bf57452b28,0x353a1607ac744a53, - 0xd3fa922f2d1675f2,0x42889b8997915ce8, - 0x847c9b5d7c2e09b7,0x69956135febada11, - 0xa59bc234db398c25,0x43fab9837e699095, - 0xcf02b2c21207ef2e,0x94f967e45e03f4bb, - 0x8161afb94b44f57d,0x1d1be0eebac278f5, - 0xa1ba1ba79e1632dc,0x6462d92a69731732, - 0xca28a291859bbf93,0x7d7b8f7503cfdcfe, - 0xfcb2cb35e702af78,0x5cda735244c3d43e, - 0x9defbf01b061adab,0x3a0888136afa64a7, - 0xc56baec21c7a1916,0x88aaa1845b8fdd0, - 0xf6c69a72a3989f5b,0x8aad549e57273d45, - 0x9a3c2087a63f6399,0x36ac54e2f678864b, - 0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd, - 0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5, - 0x969eb7c47859e743,0x9f644ae5a4b1b325, - 0xbc4665b596706114,0x873d5d9f0dde1fee, - 0xeb57ff22fc0c7959,0xa90cb506d155a7ea, - 0x9316ff75dd87cbd8,0x9a7f12442d588f2, - 0xb7dcbf5354e9bece,0xc11ed6d538aeb2f, - 0xe5d3ef282a242e81,0x8f1668c8a86da5fa, - 0x8fa475791a569d10,0xf96e017d694487bc, - 0xb38d92d760ec4455,0x37c981dcc395a9ac, - 0xe070f78d3927556a,0x85bbe253f47b1417, - 0x8c469ab843b89562,0x93956d7478ccec8e, - 0xaf58416654a6babb,0x387ac8d1970027b2, - 0xdb2e51bfe9d0696a,0x6997b05fcc0319e, - 0x88fcf317f22241e2,0x441fece3bdf81f03, - 0xab3c2fddeeaad25a,0xd527e81cad7626c3, - 0xd60b3bd56a5586f1,0x8a71e223d8d3b074, - 0x85c7056562757456,0xf6872d5667844e49, - 0xa738c6bebb12d16c,0xb428f8ac016561db, - 0xd106f86e69d785c7,0xe13336d701beba52, - 0x82a45b450226b39c,0xecc0024661173473, - 0xa34d721642b06084,0x27f002d7f95d0190, - 0xcc20ce9bd35c78a5,0x31ec038df7b441f4, - 0xff290242c83396ce,0x7e67047175a15271, - 0x9f79a169bd203e41,0xf0062c6e984d386, - 0xc75809c42c684dd1,0x52c07b78a3e60868, - 0xf92e0c3537826145,0xa7709a56ccdf8a82, - 0x9bbcc7a142b17ccb,0x88a66076400bb691, - 0xc2abf989935ddbfe,0x6acff893d00ea435, - 0xf356f7ebf83552fe,0x583f6b8c4124d43, - 0x98165af37b2153de,0xc3727a337a8b704a, - 0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c, - 0xeda2ee1c7064130c,0x1162def06f79df73, - 0x9485d4d1c63e8be7,0x8addcb5645ac2ba8, - 0xb9a74a0637ce2ee1,0x6d953e2bd7173692, - 0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437, - 0x910ab1d4db9914a0,0x1d9c9892400a22a2, - 0xb54d5e4a127f59c8,0x2503beb6d00cab4b, - 0xe2a0b5dc971f303a,0x2e44ae64840fd61d, - 0x8da471a9de737e24,0x5ceaecfed289e5d2, - 0xb10d8e1456105dad,0x7425a83e872c5f47, - 0xdd50f1996b947518,0xd12f124e28f77719, - 0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f, - 0xace73cbfdc0bfb7b,0x636cc64d1001550b, - 0xd8210befd30efa5a,0x3c47f7e05401aa4e, - 0x8714a775e3e95c78,0x65acfaec34810a71, - 0xa8d9d1535ce3b396,0x7f1839a741a14d0d, - 0xd31045a8341ca07c,0x1ede48111209a050, - 0x83ea2b892091e44d,0x934aed0aab460432, - 0xa4e4b66b68b65d60,0xf81da84d5617853f, - 0xce1de40642e3f4b9,0x36251260ab9d668e, - 0x80d2ae83e9ce78f3,0xc1d72b7c6b426019, - 0xa1075a24e4421730,0xb24cf65b8612f81f, - 0xc94930ae1d529cfc,0xdee033f26797b627, - 0xfb9b7cd9a4a7443c,0x169840ef017da3b1, - 0x9d412e0806e88aa5,0x8e1f289560ee864e, - 0xc491798a08a2ad4e,0xf1a6f2bab92a27e2, - 0xf5b5d7ec8acb58a2,0xae10af696774b1db, - 0x9991a6f3d6bf1765,0xacca6da1e0a8ef29, - 0xbff610b0cc6edd3f,0x17fd090a58d32af3, - 0xeff394dcff8a948e,0xddfc4b4cef07f5b0, - 0x95f83d0a1fb69cd9,0x4abdaf101564f98e, - 0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1, - 0xea53df5fd18d5513,0x84c86189216dc5ed, - 0x92746b9be2f8552c,0x32fd3cf5b4e49bb4, - 0xb7118682dbb66a77,0x3fbc8c33221dc2a1, - 0xe4d5e82392a40515,0xfabaf3feaa5334a, - 0x8f05b1163ba6832d,0x29cb4d87f2a7400e, - 0xb2c71d5bca9023f8,0x743e20e9ef511012, - 0xdf78e4b2bd342cf6,0x914da9246b255416, - 0x8bab8eefb6409c1a,0x1ad089b6c2f7548e, - 0xae9672aba3d0c320,0xa184ac2473b529b1, - 0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e, - 0x8865899617fb1871,0x7e2fa67c7a658892, - 0xaa7eebfb9df9de8d,0xddbb901b98feeab7, - 0xd51ea6fa85785631,0x552a74227f3ea565, - 0x8533285c936b35de,0xd53a88958f87275f, - 0xa67ff273b8460356,0x8a892abaf368f137, - 0xd01fef10a657842c,0x2d2b7569b0432d85, - 0x8213f56a67f6b29b,0x9c3b29620e29fc73, - 0xa298f2c501f45f42,0x8349f3ba91b47b8f, - 0xcb3f2f7642717713,0x241c70a936219a73, - 0xfe0efb53d30dd4d7,0xed238cd383aa0110, - 0x9ec95d1463e8a506,0xf4363804324a40aa, - 0xc67bb4597ce2ce48,0xb143c6053edcd0d5, - 0xf81aa16fdc1b81da,0xdd94b7868e94050a, - 0x9b10a4e5e9913128,0xca7cf2b4191c8326, - 0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0, - 0xf24a01a73cf2dccf,0xbc633b39673c8cec, - 0x976e41088617ca01,0xd5be0503e085d813, - 0xbd49d14aa79dbc82,0x4b2d8644d8a74e18, - 0xec9c459d51852ba2,0xddf8e7d60ed1219e, - 0x93e1ab8252f33b45,0xcabb90e5c942b503, - 0xb8da1662e7b00a17,0x3d6a751f3b936243, - 0xe7109bfba19c0c9d,0xcc512670a783ad4, - 0x906a617d450187e2,0x27fb2b80668b24c5, - 0xb484f9dc9641e9da,0xb1f9f660802dedf6, - 0xe1a63853bbd26451,0x5e7873f8a0396973, - 0x8d07e33455637eb2,0xdb0b487b6423e1e8, - 0xb049dc016abc5e5f,0x91ce1a9a3d2cda62, - 0xdc5c5301c56b75f7,0x7641a140cc7810fb, - 0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d, - 0xac2820d9623bf429,0x546345fa9fbdcd44, - 0xd732290fbacaf133,0xa97c177947ad4095, - 0x867f59a9d4bed6c0,0x49ed8eabcccc485d, - 0xa81f301449ee8c70,0x5c68f256bfff5a74, - 0xd226fc195c6a2f8c,0x73832eec6fff3111, - 0x83585d8fd9c25db7,0xc831fd53c5ff7eab, - 0xa42e74f3d032f525,0xba3e7ca8b77f5e55, - 0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb, - 0x80444b5e7aa7cf85,0x7980d163cf5b81b3, - 0xa0555e361951c366,0xd7e105bcc332621f, - 0xc86ab5c39fa63440,0x8dd9472bf3fefaa7, - 0xfa856334878fc150,0xb14f98f6f0feb951, - 0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3, - 0xc3b8358109e84f07,0xa862f80ec4700c8, - 0xf4a642e14c6262c8,0xcd27bb612758c0fa, - 0x98e7e9cccfbd7dbd,0x8038d51cb897789c, - 0xbf21e44003acdd2c,0xe0470a63e6bd56c3, - 0xeeea5d5004981478,0x1858ccfce06cac74, - 0x95527a5202df0ccb,0xf37801e0c43ebc8, - 0xbaa718e68396cffd,0xd30560258f54e6ba, - 0xe950df20247c83fd,0x47c6b82ef32a2069, - 0x91d28b7416cdd27e,0x4cdc331d57fa5441, - 0xb6472e511c81471d,0xe0133fe4adf8e952, - 0xe3d8f9e563a198e5,0x58180fddd97723a6, - 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; - -} // namespace internal -} // namespace simdjson -/* end file src/internal/numberparsing_tables.cpp */ -/* begin file src/internal/simdprune_tables.cpp */ -#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 - -#include - -namespace simdjson { // table modified and copied from -namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable -SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256] = { - 0, 2, 2, 4, 2, 4, 4, 6, 2, 4, 4, 6, 4, 6, 6, 8, 2, 4, 4, - 6, 4, 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 2, 4, 4, 6, 4, 6, - 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, - 8, 8, 10, 8, 10, 10, 12, 2, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, - 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, - 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, 8, - 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 2, 4, 4, 6, 4, - 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, - 6, 8, 8, 10, 8, 10, 10, 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, - 10, 8, 10, 10, 12, 6, 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, - 12, 14, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, - 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 6, 8, 8, 10, - 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 8, 10, 10, 12, 10, 12, 12, - 14, 10, 12, 12, 14, 12, 14, 14, 16}; - -SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03, - 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, - 0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, - 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b, - 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -}; - -// 256 * 8 bytes = 2kB, easily fits in cache. -SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256] = { - 0x0706050403020100, 0x0007060504030201, 0x0007060504030200, - 0x0000070605040302, 0x0007060504030100, 0x0000070605040301, - 0x0000070605040300, 0x0000000706050403, 0x0007060504020100, - 0x0000070605040201, 0x0000070605040200, 0x0000000706050402, - 0x0000070605040100, 0x0000000706050401, 0x0000000706050400, - 0x0000000007060504, 0x0007060503020100, 0x0000070605030201, - 0x0000070605030200, 0x0000000706050302, 0x0000070605030100, - 0x0000000706050301, 0x0000000706050300, 0x0000000007060503, - 0x0000070605020100, 0x0000000706050201, 0x0000000706050200, - 0x0000000007060502, 0x0000000706050100, 0x0000000007060501, - 0x0000000007060500, 0x0000000000070605, 0x0007060403020100, - 0x0000070604030201, 0x0000070604030200, 0x0000000706040302, - 0x0000070604030100, 0x0000000706040301, 0x0000000706040300, - 0x0000000007060403, 0x0000070604020100, 0x0000000706040201, - 0x0000000706040200, 0x0000000007060402, 0x0000000706040100, - 0x0000000007060401, 0x0000000007060400, 0x0000000000070604, - 0x0000070603020100, 0x0000000706030201, 0x0000000706030200, - 0x0000000007060302, 0x0000000706030100, 0x0000000007060301, - 0x0000000007060300, 0x0000000000070603, 0x0000000706020100, - 0x0000000007060201, 0x0000000007060200, 0x0000000000070602, - 0x0000000007060100, 0x0000000000070601, 0x0000000000070600, - 0x0000000000000706, 0x0007050403020100, 0x0000070504030201, - 0x0000070504030200, 0x0000000705040302, 0x0000070504030100, - 0x0000000705040301, 0x0000000705040300, 0x0000000007050403, - 0x0000070504020100, 0x0000000705040201, 0x0000000705040200, - 0x0000000007050402, 0x0000000705040100, 0x0000000007050401, - 0x0000000007050400, 0x0000000000070504, 0x0000070503020100, - 0x0000000705030201, 0x0000000705030200, 0x0000000007050302, - 0x0000000705030100, 0x0000000007050301, 0x0000000007050300, - 0x0000000000070503, 0x0000000705020100, 0x0000000007050201, - 0x0000000007050200, 0x0000000000070502, 0x0000000007050100, - 0x0000000000070501, 0x0000000000070500, 0x0000000000000705, - 0x0000070403020100, 0x0000000704030201, 0x0000000704030200, - 0x0000000007040302, 0x0000000704030100, 0x0000000007040301, - 0x0000000007040300, 0x0000000000070403, 0x0000000704020100, - 0x0000000007040201, 0x0000000007040200, 0x0000000000070402, - 0x0000000007040100, 0x0000000000070401, 0x0000000000070400, - 0x0000000000000704, 0x0000000703020100, 0x0000000007030201, - 0x0000000007030200, 0x0000000000070302, 0x0000000007030100, - 0x0000000000070301, 0x0000000000070300, 0x0000000000000703, - 0x0000000007020100, 0x0000000000070201, 0x0000000000070200, - 0x0000000000000702, 0x0000000000070100, 0x0000000000000701, - 0x0000000000000700, 0x0000000000000007, 0x0006050403020100, - 0x0000060504030201, 0x0000060504030200, 0x0000000605040302, - 0x0000060504030100, 0x0000000605040301, 0x0000000605040300, - 0x0000000006050403, 0x0000060504020100, 0x0000000605040201, - 0x0000000605040200, 0x0000000006050402, 0x0000000605040100, - 0x0000000006050401, 0x0000000006050400, 0x0000000000060504, - 0x0000060503020100, 0x0000000605030201, 0x0000000605030200, - 0x0000000006050302, 0x0000000605030100, 0x0000000006050301, - 0x0000000006050300, 0x0000000000060503, 0x0000000605020100, - 0x0000000006050201, 0x0000000006050200, 0x0000000000060502, - 0x0000000006050100, 0x0000000000060501, 0x0000000000060500, - 0x0000000000000605, 0x0000060403020100, 0x0000000604030201, - 0x0000000604030200, 0x0000000006040302, 0x0000000604030100, - 0x0000000006040301, 0x0000000006040300, 0x0000000000060403, - 0x0000000604020100, 0x0000000006040201, 0x0000000006040200, - 0x0000000000060402, 0x0000000006040100, 0x0000000000060401, - 0x0000000000060400, 0x0000000000000604, 0x0000000603020100, - 0x0000000006030201, 0x0000000006030200, 0x0000000000060302, - 0x0000000006030100, 0x0000000000060301, 0x0000000000060300, - 0x0000000000000603, 0x0000000006020100, 0x0000000000060201, - 0x0000000000060200, 0x0000000000000602, 0x0000000000060100, - 0x0000000000000601, 0x0000000000000600, 0x0000000000000006, - 0x0000050403020100, 0x0000000504030201, 0x0000000504030200, - 0x0000000005040302, 0x0000000504030100, 0x0000000005040301, - 0x0000000005040300, 0x0000000000050403, 0x0000000504020100, - 0x0000000005040201, 0x0000000005040200, 0x0000000000050402, - 0x0000000005040100, 0x0000000000050401, 0x0000000000050400, - 0x0000000000000504, 0x0000000503020100, 0x0000000005030201, - 0x0000000005030200, 0x0000000000050302, 0x0000000005030100, - 0x0000000000050301, 0x0000000000050300, 0x0000000000000503, - 0x0000000005020100, 0x0000000000050201, 0x0000000000050200, - 0x0000000000000502, 0x0000000000050100, 0x0000000000000501, - 0x0000000000000500, 0x0000000000000005, 0x0000000403020100, - 0x0000000004030201, 0x0000000004030200, 0x0000000000040302, - 0x0000000004030100, 0x0000000000040301, 0x0000000000040300, - 0x0000000000000403, 0x0000000004020100, 0x0000000000040201, - 0x0000000000040200, 0x0000000000000402, 0x0000000000040100, - 0x0000000000000401, 0x0000000000000400, 0x0000000000000004, - 0x0000000003020100, 0x0000000000030201, 0x0000000000030200, - 0x0000000000000302, 0x0000000000030100, 0x0000000000000301, - 0x0000000000000300, 0x0000000000000003, 0x0000000000020100, - 0x0000000000000201, 0x0000000000000200, 0x0000000000000002, - 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, - 0x0000000000000000, -}; //static uint64_t thintable_epi8[256] - -} // namespace internal -} // namespace simdjson - -#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 -/* end file src/internal/simdprune_tables.cpp */ -/* begin file src/implementation.cpp */ -#include - -namespace simdjson { - -bool implementation::supported_by_runtime_system() const { - uint32_t required_instruction_sets = this->required_instruction_sets(); - uint32_t supported_instruction_sets = internal::detect_supported_architectures(); - return ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets); -} - -namespace internal { - -// Static array of known implementations. We're hoping these get baked into the executable -// without requiring a static initializer. - -#if SIMDJSON_IMPLEMENTATION_ICELAKE -static const icelake::implementation* get_icelake_singleton() { - static const icelake::implementation icelake_singleton{}; - return &icelake_singleton; -} -#endif -#if SIMDJSON_IMPLEMENTATION_HASWELL -static const haswell::implementation* get_haswell_singleton() { - static const haswell::implementation haswell_singleton{}; - return &haswell_singleton; -} -#endif -#if SIMDJSON_IMPLEMENTATION_WESTMERE -static const westmere::implementation* get_westmere_singleton() { - static const westmere::implementation westmere_singleton{}; - return &westmere_singleton; -} -#endif // SIMDJSON_IMPLEMENTATION_WESTMERE -#if SIMDJSON_IMPLEMENTATION_ARM64 -static const arm64::implementation* get_arm64_singleton() { - static const arm64::implementation arm64_singleton{}; - return &arm64_singleton; -} -#endif // SIMDJSON_IMPLEMENTATION_ARM64 -#if SIMDJSON_IMPLEMENTATION_PPC64 -static const ppc64::implementation* get_ppc64_singleton() { - static const ppc64::implementation ppc64_singleton{}; - return &ppc64_singleton; -} -#endif // SIMDJSON_IMPLEMENTATION_PPC64 -#if SIMDJSON_IMPLEMENTATION_FALLBACK -static const fallback::implementation* get_fallback_singleton() { - static const fallback::implementation fallback_singleton{}; - return &fallback_singleton; -} -#endif // SIMDJSON_IMPLEMENTATION_FALLBACK - -/** - * @private Detects best supported implementation on first use, and sets it - */ -class detect_best_supported_implementation_on_first_use final : public implementation { -public: - const std::string &name() const noexcept final { return set_best()->name(); } - const std::string &description() const noexcept final { return set_best()->description(); } - uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); } - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final { - return set_best()->create_dom_parser_implementation(capacity, max_length, dst); - } - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final { - return set_best()->minify(buf, len, dst, dst_len); - } - simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override { - return set_best()->validate_utf8(buf, len); - } - simdjson_really_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} -private: - const implementation *set_best() const noexcept; -}; - -static const std::initializer_list& get_available_implementation_pointers() { - static const std::initializer_list available_implementation_pointers { -#if SIMDJSON_IMPLEMENTATION_ICELAKE - get_icelake_singleton(), -#endif -#if SIMDJSON_IMPLEMENTATION_HASWELL - get_haswell_singleton(), -#endif -#if SIMDJSON_IMPLEMENTATION_WESTMERE - get_westmere_singleton(), -#endif -#if SIMDJSON_IMPLEMENTATION_ARM64 - get_arm64_singleton(), -#endif -#if SIMDJSON_IMPLEMENTATION_PPC64 - get_ppc64_singleton(), -#endif -#if SIMDJSON_IMPLEMENTATION_FALLBACK - get_fallback_singleton(), -#endif - }; // available_implementation_pointers - return available_implementation_pointers; -} - -// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support -class unsupported_implementation final : public implementation { -public: - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t, - size_t, - std::unique_ptr& - ) const noexcept final { - return UNSUPPORTED_ARCHITECTURE; - } - simdjson_warn_unused error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final override { - return UNSUPPORTED_ARCHITECTURE; - } - simdjson_warn_unused bool validate_utf8(const char *, size_t) const noexcept final override { - return false; // Just refuse to validate. Given that we have a fallback implementation - // it seems unlikely that unsupported_implementation will ever be used. If it is used, - // then it will flag all strings as invalid. The alternative is to return an error_code - // from which the user has to figure out whether the string is valid UTF-8... which seems - // like a lot of work just to handle the very unlikely case that we have an unsupported - // implementation. And, when it does happen (that we have an unsupported implementation), - // what are the chances that the programmer has a fallback? Given that *we* provide the - // fallback, it implies that the programmer would need a fallback for our fallback. - } - unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {} -}; - -const unsupported_implementation* get_unsupported_singleton() { - static const unsupported_implementation unsupported_singleton{}; - return &unsupported_singleton; -} - -size_t available_implementation_list::size() const noexcept { - return internal::get_available_implementation_pointers().size(); -} -const implementation * const *available_implementation_list::begin() const noexcept { - return internal::get_available_implementation_pointers().begin(); -} -const implementation * const *available_implementation_list::end() const noexcept { - return internal::get_available_implementation_pointers().end(); -} -const implementation *available_implementation_list::detect_best_supported() const noexcept { - // They are prelisted in priority order, so we just go down the list - uint32_t supported_instruction_sets = internal::detect_supported_architectures(); - for (const implementation *impl : internal::get_available_implementation_pointers()) { - uint32_t required_instruction_sets = impl->required_instruction_sets(); - if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; } - } - return get_unsupported_singleton(); // this should never happen? -} - -const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept { - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - char *force_implementation_name = getenv("SIMDJSON_FORCE_IMPLEMENTATION"); - SIMDJSON_POP_DISABLE_WARNINGS - - if (force_implementation_name) { - auto force_implementation = get_available_implementations()[force_implementation_name]; - if (force_implementation) { - return get_active_implementation() = force_implementation; - } else { - // Note: abort() and stderr usage within the library is forbidden. - return get_active_implementation() = get_unsupported_singleton(); - } - } - return get_active_implementation() = get_available_implementations().detect_best_supported(); -} - -} // namespace internal - -SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations() { - static const internal::available_implementation_list available_implementations{}; - return available_implementations; -} - -SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation() { - static const internal::detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton; - static internal::atomic_ptr active_implementation{&detect_best_supported_implementation_on_first_use_singleton}; - return active_implementation; -} - -simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept { - return get_active_implementation()->minify(reinterpret_cast(buf), len, reinterpret_cast(dst), dst_len); -} -simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { - return get_active_implementation()->validate_utf8(buf, len); -} - -const implementation * builtin_implementation() { - static const implementation * builtin_impl = get_available_implementations()[SIMDJSON_STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)]; - assert(builtin_impl); - return builtin_impl; -} - - -} // namespace simdjson -/* end file src/implementation.cpp */ - -#if SIMDJSON_IMPLEMENTATION_ARM64 -/* begin file src/arm64/implementation.cpp */ -/* begin file include/simdjson/arm64/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "arm64" -// #define SIMDJSON_IMPLEMENTATION arm64 -/* end file include/simdjson/arm64/begin.h */ - -namespace simdjson { -namespace arm64 { - -simdjson_warn_unused error_code implementation::create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr& dst -) const noexcept { - dst.reset( new (std::nothrow) dom_parser_implementation() ); - if (!dst) { return MEMALLOC; } - if (auto err = dst->set_capacity(capacity)) - return err; - if (auto err = dst->set_max_depth(max_depth)) - return err; - return SUCCESS; -} - -} // namespace arm64 -} // namespace simdjson - -/* begin file include/simdjson/arm64/end.h */ -/* end file include/simdjson/arm64/end.h */ -/* end file src/arm64/implementation.cpp */ -/* begin file src/arm64/dom_parser_implementation.cpp */ -/* begin file include/simdjson/arm64/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "arm64" -// #define SIMDJSON_IMPLEMENTATION arm64 -/* end file include/simdjson/arm64/begin.h */ - -// -// Stage 1 -// -namespace simdjson { -namespace arm64 { -namespace { - -using namespace simd; - -struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); - - simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_really_inline uint64_t op() const noexcept { return _op; } - simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } - - uint64_t _whitespace; - uint64_t _op; -}; - -simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { - // Functional programming causes trouble with Visual Studio. - // Keeping this version in comments since it is much nicer: - // auto v = in.map([&](simd8 chunk) { - // auto nib_lo = chunk & 0xf; - // auto nib_hi = chunk.shr<4>(); - // auto shuf_lo = nib_lo.lookup_16(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); - // auto shuf_hi = nib_hi.lookup_16(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); - // return shuf_lo & shuf_hi; - // }); - const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); - const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); - - simd8x64 v( - (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), - (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), - (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), - (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) - ); - - - // We compute whitespace and op separately. If the code later only use one or the - // other, given the fact that all functions are aggressively inlined, we can - // hope that useless computations will be omitted. This is namely case when - // minifying (we only need whitespace). *However* if we only need spaces, - // it is likely that we will still compute 'v' above with two lookup_16: one - // could do it a bit cheaper. This is in contrast with the x64 implementations - // where we can, efficiently, do the white space and structural matching - // separately. One reason for this difference is that on ARM NEON, the table - // lookups either zero or leave unchanged the characters exceeding 0xF whereas - // on x64, the equivalent instruction (pshufb) automatically applies a mask, - // ignoring the 4 most significant bits. Thus the x64 implementation is - // optimized differently. This being said, if you use this code strictly - // just for minification (or just to identify the structural characters), - // there is a small untaken optimization opportunity here. We deliberately - // do not pick it up. - - uint64_t op = simd8x64( - v.chunks[0].any_bits_set(0x7), - v.chunks[1].any_bits_set(0x7), - v.chunks[2].any_bits_set(0x7), - v.chunks[3].any_bits_set(0x7) - ).to_bitmask(); - - uint64_t whitespace = simd8x64( - v.chunks[0].any_bits_set(0x18), - v.chunks[1].any_bits_set(0x18), - v.chunks[2].any_bits_set(0x18), - v.chunks[3].any_bits_set(0x18) - ).to_bitmask(); - - return { whitespace, op }; -} - -simdjson_really_inline bool is_ascii(const simd8x64& input) { - simd8 bits = input.reduce_or(); - return bits.max_val() < 0b10000000u; -} - -simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1 >= uint8_t(0b11000000u); - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); - // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well. - // This will work fine because we only have to report errors for cases with 0-1 lead bytes. - // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is - // guaranteed to be at least *one* lead byte that is part of only 1 other multibyte character. - // The error will be detected there. - return is_second_byte ^ is_third_byte ^ is_fourth_byte; -} - -simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2 >= uint8_t(0b11100000u); - simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u); - return is_third_byte ^ is_fourth_byte; -} - -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson - -/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ -namespace simdjson { -namespace arm64 { -namespace { -namespace utf8_validation { - -using namespace simd; - - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { -// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) -// Bit 1 = Too Long (ASCII followed by continuation) -// Bit 2 = Overlong 3-byte -// Bit 4 = Surrogate -// Bit 5 = Overlong 2-byte -// Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1<<6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); - constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, - CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 - ); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); - return (byte_1_high & byte_1_low & byte_2_high); - } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, - const simd8 prev_input, const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; - } - - // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. - // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ -#if SIMDJSON_IMPLEMENTATION_ICELAKE - static const uint8_t max_array[64] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; -#else - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; -#endif - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); - } - - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes - // too large in the first of two bytes. - simdjson_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } - - simdjson_really_inline void check_next_input(const simd8x64& input) { - if(simdjson_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 1) - ||(simd8x64::NUM_CHUNKS == 2) - || (simd8x64::NUM_CHUNKS == 4), - "We support one, two or four chunks per 64-byte block."); - if(simd8x64::NUM_CHUNKS == 1) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if(simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if(simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } - } - // do not forget to call check_eof! - simdjson_really_inline error_code errors() { - return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; - } - - }; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ -/* begin file src/generic/stage1/json_structural_indexer.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -/* begin file src/generic/stage1/buf_block_reader.h */ -namespace simdjson { -namespace arm64 { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with spaces -template -struct buf_block_reader { -public: - simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this - * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there - * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdjson_really_inline size_t get_remainder(uint8_t *dst) const; - simdjson_really_inline void advance(); -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text_64(const uint8_t *text) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text(const simd8x64& in) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdjson_unused static char * format_mask(uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} - -template -simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } - -template -simdjson_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { - if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdjson_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file src/generic/stage1/buf_block_reader.h */ -/* begin file src/generic/stage1/json_string_scanner.h */ -namespace simdjson { -namespace arm64 { -namespace { -namespace stage1 { - -struct json_string_block { - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : - _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} - - // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } - // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } - // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } - // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } - // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } - // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } - // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } - - // backslash characters - uint64_t _backslash; - // escaped characters (backslashed--does not include the hex characters after \u) - uint64_t _escaped; - // real quotes (non-backslashed ones) - uint64_t _quote; - // string characters (includes start quote but not end quote) - uint64_t _in_string; -}; - -// Scans blocks for string characters, storing the state necessary to do so -class json_string_scanner { -public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); - - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; - // Whether the first character of the next iteration is escaped. - uint64_t prev_escaped = 0ULL; -}; - -// -// Finds escaped characters (characters following \). -// -// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). -// -// Does this by: -// - Shift the escape mask to get potentially escaped characters (characters after backslashes). -// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) -// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) -// -// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all -// escape sequences, filters out the ones that start on even bits, and adds that to the mask of -// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since -// the start bit causes a carry), and leaves even-bit sequences alone. -// -// Example: -// -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape -// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape -// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later -// invert_mask | | cxxx c xx c| even_seq << 1 -// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit -// escaped | x | x x x x x x x x | -// desired | x | x x x x x x x x | -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// -simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { - // If there was overflow, pretend the first character isn't a backslash - backslash &= ~prev_escaped; - uint64_t follows_escape = backslash << 1 | prev_escaped; - - // Get sequences starting on even bits by clearing out the odd series using + - const uint64_t even_bits = 0x5555555555555555ULL; - uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; - uint64_t sequences_starting_on_even_bits; - prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); - uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. - - // Mask every other backslashed character as an escaped character - // Flip the mask for sequences that start on even bits, to correct them - return (even_bits ^ invert_mask) & follows_escape; -} - -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = find_escaped(backslash); - const uint64_t quote = in.eq('"') & ~escaped; - - // - // prefix_xor flips on bits inside the string (and flips off the end quote). - // - // Then we xor with prev_in_string: if we were in a string already, its effect is flipped - // (characters inside strings are outside, and characters outside strings are inside). - // - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; - - // - // Check if we're still in a string at the end of the box so the next block will know - // - // right shift of a signed value expected to be well-defined and standard - // compliant as of C++20, John Regher from Utah U. says this is fine code - // - prev_in_string = uint64_t(static_cast(in_string) >> 63); - - // Use ^ to turn the beginning quote off, and the end quote on. - - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_string_block( - backslash, - escaped, - quote, - in_string - ); -} - -simdjson_really_inline error_code json_string_scanner::finish() { - if (prev_in_string) { - return UNCLOSED_STRING; - } - return SUCCESS; -} - -} // namespace stage1 -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file src/generic/stage1/json_string_scanner.h */ -/* begin file src/generic/stage1/json_scanner.h */ -namespace simdjson { -namespace arm64 { -namespace { -namespace stage1 { - -/** - * A block of scanned json, with information on operators and scalars. - * - * We seek to identify pseudo-structural characters. Anything that is inside - * a string must be omitted (hence & ~_string.string_tail()). - * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The - * term 'structural character' is from the JSON RFC. - * 2. We have the 'scalar pseudo-structural characters'. - * Scalars are quotes, and any character except structural characters and white space. - * - * To identify the scalar pseudo-structural characters, we must look at what comes - * before them: it must be a space, a quote or a structural characters. - * Starting with simdjson v0.3, we identify them by - * negation: we identify everything that is followed by a non-quote scalar, - * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. - */ -struct json_block { -public: - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - - /** - * The start of structurals. - * In simdjson prior to v0.3, these were called the pseudo-structural characters. - **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } - /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } - - // Helpers - - /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } - /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } - - // string and escape characters - json_string_block _string; - // whitespace, structural characters ('operators'), scalars - json_character_block _characters; - // whether the previous character was a scalar - uint64_t _follows_potential_nonquote_scalar; -private: - // Potential structurals (i.e. disregarding strings) - - /** - * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". - * They may reside inside a string. - **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } - /** - * The start of non-operator runs, like 123, true and "abc". - * It main reside inside a string. - **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space - // then we know that it is irrelevant structurally. - return _characters.scalar() & ~follows_potential_scalar(); - } - /** - * Whether the given character is immediately after a non-operator like 123, true. - * The characters following a quote are not included. - */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { - // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character - // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a - // white space. - // It is understood that within quoted region, anything at all could be marked (irrelevant). - return _follows_potential_nonquote_scalar; - } -}; - -/** - * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. - * - * The scanner starts by calculating two distinct things: - * - string characters (taking \" into account) - * - structural characters or 'operators' ([]{},:, comma) - * and scalars (runs of non-operators like 123, true and "abc") - * - * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: - * in particular, the operator/scalar bit will find plenty of things that are actually part of - * strings. When we're done, json_block will fuse the two together by masking out tokens that are - * part of a string. - */ -class json_scanner { -public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Whether the last character of the previous iteration is part of a scalar token - // (anything except whitespace or a structural character/'operator'). - uint64_t prev_scalar = 0ULL; - json_string_scanner string_scanner{}; -}; - - -// -// Check if the current character immediately follows a matching character. -// -// For example, this checks for quotes with backslashes in front of them: -// -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); -// -simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; -} - -simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { - json_string_block strings = string_scanner.next(in); - // identifies the white-space and the structural characters - json_character_block characters = json_character_block::classify(in); - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). - // - // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) - // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential - // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we - // may need to add an extra check when parsing strings. - // - // Performance: there are many ways to skin this cat. - const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); - uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_block( - strings,// strings is a function-local object so either it moves or the copy is elided. - characters, - follows_nonquote_scalar - ); -} - -simdjson_really_inline error_code json_scanner::finish() { - return string_scanner.finish(); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file src/generic/stage1/json_scanner.h */ -/* begin file src/generic/stage1/json_minifier.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -namespace simdjson { -namespace arm64 { -namespace { -namespace stage1 { - -class json_minifier { -public: - template - static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; - -private: - simdjson_really_inline json_minifier(uint8_t *_dst) - : dst{_dst} - {} - template - simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); - json_scanner scanner{}; - uint8_t *dst; -}; - -simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { - uint64_t mask = block.whitespace(); - dst += in.compress(mask, dst); -} - -simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { - error_code error = scanner.finish(); - if (error) { dst_len = 0; return error; } - dst_len = dst - dst_start; - return SUCCESS; -} - -template<> -simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - simd::simd8x64 in_2(block_buf+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1); - this->next(in_2, block_2); - reader.advance(); -} - -template<> -simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - json_block block_1 = scanner.next(in_1); - this->next(block_buf, block_1); - reader.advance(); -} - -template -error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { - buf_block_reader reader(buf, len); - json_minifier minifier(dst); - - // Index the first n-1 blocks - while (reader.has_full_block()) { - minifier.step(reader.full_block(), reader); - } - - // Index the last (remainder) block, padded with spaces - uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); - if (remaining_bytes > 0) { - // We do not want to write directly to the output stream. Rather, we write - // to a local buffer (for safety). - uint8_t out_block[STEP_SIZE]; - uint8_t * const guarded_dst{minifier.dst}; - minifier.dst = out_block; - minifier.step(block, reader); - size_t to_write = minifier.dst - out_block; - // In some cases, we could be enticed to consider the padded spaces - // as part of the string. This is fine as long as we do not write more - // than we consumed. - if(to_write > remaining_bytes) { to_write = remaining_bytes; } - memcpy(guarded_dst, out_block, to_write); - minifier.dst = guarded_dst + to_write; - } - return minifier.finish(dst, dst_len); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file src/generic/stage1/json_minifier.h */ -/* begin file src/generic/stage1/find_next_document_index.h */ -namespace simdjson { -namespace arm64 { -namespace { - -/** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ':' ',' - * and when the second element is NOT one of these characters: '}' ']' ':' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // Variant: do not count separately, just figure out depth - if(parser.n_structural_indexes == 0) { return 0; } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; - } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; - } - // If we made it to the end, we want to finish counting to see if we have a full document. - switch (parser.buf[parser.structural_indexes[0]]) { - case '}': - obj_cnt--; - break; - case ']': - arr_cnt--; - break; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - if (!arr_cnt && !obj_cnt) { - // We have a complete document. - return parser.n_structural_indexes; - } - return 0; -} - -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file src/generic/stage1/find_next_document_index.h */ - -namespace simdjson { -namespace arm64 { -namespace { -namespace stage1 { - -class bit_indexer { -public: - uint32_t *tail; - - simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} - - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this - // - // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own - // version of the code. -#ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); -#else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; -#if defined(SIMDJSON_PREFER_REVERSE_BITS) - /** - * ARM lacks a fast trailing zero instruction, but it has a fast - * bit reversal instruction and a fast leading zero instruction. - * Thus it may be profitable to reverse the bits (once) and then - * to rely on a sequence of instructions that call the leading - * zero instruction. - * - * Performance notes: - * The chosen routine is not optimal in terms of data dependency - * since zero_leading_bit might require two instructions. However, - * it tends to minimize the total number of instructions which is - * beneficial. - */ - - uint64_t rev_bits = reverse_bits(bits); - int cnt = static_cast(count_ones(bits)); - int i = 0; - // Do the first 8 all together - for (; i<8; i++) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (simdjson_unlikely(cnt > 8)) { - i = 8; - for (; i<16; i++) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (simdjson_unlikely(cnt > 16)) { - i = 16; - while (rev_bits != 0) { - int lz = leading_zeroes(rev_bits); - this->tail[i++] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - } - } - this->tail += cnt; -#else // SIMDJSON_PREFER_REVERSE_BITS - /** - * Under recent x64 systems, we often have both a fast trailing zero - * instruction and a fast 'clear-lower-bit' instruction so the following - * algorithm can be competitive. - */ - - int cnt = static_cast(count_ones(bits)); - // Do the first 8 all together - for (int i=0; i<8; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (simdjson_unlikely(cnt > 8)) { - for (int i=8; i<16; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (simdjson_unlikely(cnt > 16)) { - int i = 16; - do { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - i++; - } while (i < cnt); - } - } - - this->tail += cnt; -#endif - } -#endif // SIMDJSON_CUSTOM_BIT_INDEXER - -}; - -class json_structural_indexer { -public: - /** - * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. - * - * @param partial Setting the partial parameter to true allows the find_structural_bits to - * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If - * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. - */ - template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; - -private: - simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); - template - simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); - - json_scanner scanner{}; - utf8_checker checker{}; - bit_indexer indexer; - uint64_t prev_structurals = 0; - uint64_t unescaped_chars_error = 0; -}; - -simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} - -// Skip the last character if it is partial -simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { - if (simdjson_unlikely(len < 3)) { - switch (len) { - case 2: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left - return len; - case 1: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - return len; - case 0: - return len; - } - } - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left - return len; -} - -// -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. -// -template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { - if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - // We guard the rest of the code so that we can assume that len > 0 throughout. - if (len == 0) { return EMPTY; } - if (is_streaming(partial)) { - len = trim_partial_utf8(buf, len); - // If you end up with an empty window after trimming - // the partial UTF-8 bytes, then chances are good that you - // have an UTF-8 formatting error. - if(len == 0) { return UTF8_ERROR; } - } - buf_block_reader reader(buf, len); - json_structural_indexer indexer(parser.structural_indexes.get()); - - // Read all but the last block - while (reader.has_full_block()) { - indexer.step(reader.full_block(), reader); - } - // Take care of the last block (will always be there unless file is empty which is - // not supposed to happen.) - uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } - indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block); - simd::simd8x64 in_2(block+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1, reader.block_index()); - this->next(in_2, block_2, reader.block_index()+64); - reader.advance(); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block); - json_block block_1 = scanner.next(in_1); - this->next(in_1, block_1, reader.block_index()); - reader.advance(); -} - -simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { - uint64_t unescaped = in.lteq(0x1F); - checker.check_next_input(in); - indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser - prev_structurals = block.structural_start(); - unescaped_chars_error |= block.non_quote_inside_string(unescaped); -} - -simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { - // Write out the final iteration's structurals - indexer.write(uint32_t(idx-64), prev_structurals); - error_code error = scanner.finish(); - // We deliberately break down the next expression so that it is - // human readable. - const bool should_we_exit = is_streaming(partial) ? - ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING - : (error != SUCCESS); // if partial is false, we must have SUCCESS - const bool have_unclosed_string = (error == UNCLOSED_STRING); - if (simdjson_unlikely(should_we_exit)) { return error; } - - if (unescaped_chars_error) { - return UNESCAPED_CHARS; - } - parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); - /*** - * The On Demand API requires special padding. - * - * This is related to https://github.com/simdjson/simdjson/issues/906 - * Basically, we want to make sure that if the parsing continues beyond the last (valid) - * structural character, it quickly stops. - * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. - * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing - * continues, then it must be [,] or }. - * Suppose it is ] or }. We backtrack to the first character, what could it be that would - * not trigger an error? It could be ] or } but no, because you can't start a document that way. - * It can't be a comma, a colon or any simple value. So the only way we could continue is - * if the repeated character is [. But if so, the document must start with [. But if the document - * starts with [, it should end with ]. If we enforce that rule, then we would get - * ][[ which is invalid. - * - * This is illustrated with the test array_iterate_unclosed_error() on the following input: - * R"({ "a": [,,)" - **/ - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final - parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); - parser.structural_indexes[parser.n_structural_indexes + 2] = 0; - parser.next_structural_index = 0; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - return EMPTY; - } - if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { - return UNEXPECTED_ERROR; - } - if (partial == stage1_mode::streaming_partial) { - // If we have an unclosed string, then the last structural - // will be the quote and we want to make sure to omit it. - if(have_unclosed_string) { - parser.n_structural_indexes--; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } - } - // We truncate the input to the end of the last complete document (or zero). - auto new_structural_indexes = find_next_document_index(parser); - if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - if(parser.structural_indexes[0] == 0) { - // If the buffer is partial and we started at index 0 but the document is - // incomplete, it's too big to parse. - return CAPACITY; - } else { - // It is possible that the document could be parsed, we just had a lot - // of white space. - parser.n_structural_indexes = 0; - return EMPTY; - } - } - - parser.n_structural_indexes = new_structural_indexes; - } else if (partial == stage1_mode::streaming_final) { - if(have_unclosed_string) { parser.n_structural_indexes--; } - // We truncate the input to the end of the last complete document (or zero). - // Because partial == stage1_mode::streaming_final, it means that we may - // silently ignore trailing garbage. Though it sounds bad, we do it - // deliberately because many people who have streams of JSON documents - // will truncate them for processing. E.g., imagine that you are uncompressing - // the data from a size file or receiving it in chunks from the network. You - // may not know where exactly the last document will be. Meanwhile the - // document_stream instances allow people to know the JSON documents they are - // parsing (see the iterator.source() method). - parser.n_structural_indexes = find_next_document_index(parser); - // We store the initial n_structural_indexes so that the client can see - // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, - // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, - // otherwise, it will copy some prior index. - parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; - // This next line is critical, do not change it unless you understand what you are - // doing. - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - // We tolerate an unclosed string at the very end of the stream. Indeed, users - // often load their data in bulk without being careful and they want us to ignore - // the trailing garbage. - return EMPTY; - } - } - checker.check_eof(); - return checker.errors(); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file src/generic/stage1/json_structural_indexer.h */ -/* begin file src/generic/stage1/utf8_validator.h */ -namespace simdjson { -namespace arm64 { -namespace { -namespace stage1 { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t * input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return c.errors() == error_code::SUCCESS; -} - -bool generic_validate_utf8(const char * input, size_t length) { - return generic_validate_utf8(reinterpret_cast(input),length); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file src/generic/stage1/utf8_validator.h */ - -// -// Stage 2 -// - -/* begin file src/generic/stage2/tape_builder.h */ -/* begin file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/logger.h */ -// This is for an internal-only stage 2 specific logger. -// Set LOG_ENABLED = true to log what stage 2 is doing! -namespace simdjson { -namespace arm64 { -namespace { -namespace logger { - - static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; - -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - static constexpr const int LOG_EVENT_LEN = 20; - static constexpr const int LOG_BUFFER_LEN = 30; - static constexpr const int LOG_SMALL_BUFFER_LEN = 10; - static constexpr const int LOG_INDEX_LEN = 5; - - static int log_depth; // Not threadsafe. Log only. - - // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } - } - - // Print the header and set up log_start - static simdjson_really_inline void log_start() { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); - } - } - - simdjson_unused static simdjson_really_inline void log_string(const char *message) { - if (LOG_ENABLED) { - printf("%s\n", message); - } - } - - // Logs a single line from the stage 2 DOM parser - template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { - if (LOG_ENABLED) { - printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); - auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; - auto next_index = structurals.next_structural; - auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); - auto next = &structurals.buf[*next_index]; - { - // Print the next N characters in the buffer. - printf("| "); - // Otherwise, print the characters starting from the buffer position. - // Print spaces for unprintable or newline characters. - for (int i=0;i - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; - - /** - * Create an iterator capable of walking a JSON document. - * - * The document must have already passed through stage 1. - */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); - - /** - * Look at the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *peek() const noexcept; - /** - * Advance to the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *advance() noexcept; - /** - * Get the remaining length of the document, from the start of the current token. - */ - simdjson_really_inline size_t remaining_len() const noexcept; - /** - * Check if we are at the end of the document. - * - * If this is true, there are no more tokens. - */ - simdjson_really_inline bool at_eof() const noexcept; - /** - * Check if we are at the beginning of the document. - */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; - - /** - * Log that a value has been found. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_value(const char *type) const noexcept; - /** - * Log the start of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; - /** - * Log the end of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; - /** - * Log an error. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_error(const char *error) const noexcept; - - template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; - template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; -}; - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { - logger::log_start(); - - // - // Start the document - // - if (at_eof()) { return EMPTY; } - log_start_value("document"); - SIMDJSON_TRY( visitor.visit_document_start(*this) ); - - // - // Read first value - // - { - auto value = advance(); - - // Make sure the outer object or array is closed before continuing; otherwise, there are ways we - // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 - if (!STREAMING) { - switch (*value) { - case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; - } - } - - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; - } - } - goto document_end; - -// -// Object parser states -// -object_begin: - log_start_value("object"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = false; - SIMDJSON_TRY( visitor.visit_object_start(*this) ); - - { - auto key = advance(); - if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.increment_count(*this) ); - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - -object_field: - if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -object_continue: - switch (*advance()) { - case ',': - SIMDJSON_TRY( visitor.increment_count(*this) ); - { - auto key = advance(); - if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - goto object_field; - case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; - default: log_error("No comma between object fields"); return TAPE_ERROR; - } - -scope_end: - depth--; - if (depth == 0) { goto document_end; } - if (dom_parser.is_array[depth]) { goto array_continue; } - goto object_continue; - -// -// Array parser states -// -array_begin: - log_start_value("array"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = true; - SIMDJSON_TRY( visitor.visit_array_start(*this) ); - SIMDJSON_TRY( visitor.increment_count(*this) ); - -array_value: - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -array_continue: - switch (*advance()) { - case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; - case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; - default: log_error("Missing comma between array values"); return TAPE_ERROR; - } - -document_end: - log_end_value("document"); - SIMDJSON_TRY( visitor.visit_document_end(*this) ); - - dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - - // If we didn't make it to the end, it's an error - if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { - log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - return TAPE_ERROR; - } - - return SUCCESS; - -} // walk_document() - -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { -} - -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { - return &buf[*(next_structural)]; -} -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { - return &buf[*(next_structural++)]; -} -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { - return dom_parser.len - *(next_structural-1); -} - -simdjson_really_inline bool json_iterator::at_eof() const noexcept { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; -} -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { - return next_structural == dom_parser.structural_indexes.get(); -} -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { - return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; -} - -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { - logger::log_line(*this, "", type, ""); -} - -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { - logger::log_line(*this, "+", type, ""); - if (logger::LOG_ENABLED) { logger::log_depth++; } -} - -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { - if (logger::LOG_ENABLED) { logger::log_depth--; } - logger::log_line(*this, "-", type, ""); -} - -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { - logger::log_line(*this, "", "ERROR", error); -} - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_root_string(*this, value); - case 't': return visitor.visit_root_true_atom(*this, value); - case 'f': return visitor.visit_root_false_atom(*this, value); - case 'n': return visitor.visit_root_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_root_number(*this, value); - default: - log_error("Document starts with a non-value character"); - return TAPE_ERROR; - } -} -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_string(*this, value); - case 't': return visitor.visit_true_atom(*this, value); - case 'f': return visitor.visit_false_atom(*this, value); - case 'n': return visitor.visit_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_number(*this, value); - default: - log_error("Non-value found when value was expected!"); - return TAPE_ERROR; - } -} - -} // namespace stage2 -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/tape_writer.h */ -namespace simdjson { -namespace arm64 { -namespace { -namespace stage2 { - -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; - - /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; - - /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; - - /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; - - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; - - /** - * Skip the current tape entry without writing. - * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_really_inline void skip() noexcept; - - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_really_inline void skip_large_integer() noexcept; - - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_really_inline void skip_double() noexcept; - - /** - * Write a value to a known location on tape. - * - * Used to go back and write out the start of a container after the container ends. - */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; - -private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct number_writer - -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); -} - -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; -} - -/** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); -} - -simdjson_really_inline void tape_writer::skip() noexcept { - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; -} - -template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); -} - -} // namespace stage2 -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file src/generic/stage2/tape_writer.h */ - -namespace simdjson { -namespace arm64 { -namespace { -namespace stage2 { - -struct tape_builder { - template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept; - - /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; - /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; - - /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; - /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; - /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; - - /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; - /** - * Called when a key in a field is encountered. - * - * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array - * will be called after this with the field value. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; - /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; - /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; - - /** - * Called when a string, number, boolean or null is found. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; - /** - * Called when a string, number, boolean or null is found at the top level of a document (i.e. - * when there is no array or object and the entire document is a single string, number, boolean or - * null. - * - * This is separate from primitive() because simdjson's normal primitive parsing routines assume - * there is at least one more token after the value, which is only true in an array or object. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; - - /** Next location to write to tape */ - tape_writer tape; -private: - /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; - - simdjson_really_inline tape_builder(dom::document &doc) noexcept; - - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; -}; // class tape_builder - -template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept { - dom_parser.doc = &doc; - json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); - tape_builder builder(doc); - return iter.walk_document(builder); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_root_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { - constexpr uint32_t start_tape_index = 0; - tape.append(start_tape_index, internal::tape_type::ROOT); - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { - return visit_string(iter, key, true); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 - return SUCCESS; -} - -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { - iter.log_value(key ? "key" : "string"); - uint8_t *dst = on_start_string(iter); - dst = stringparsing::parse_string(value+1, dst); - if (dst == nullptr) { - iter.log_error("Invalid escape in string"); - return STRING_ERROR; - } - on_end_string(dst); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { - return visit_string(iter, value); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("number"); - return numberparsing::parse_number(value, tape); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { - // - // We need to make a copy to make sure that the string is space terminated. - // This is not about padding the input, which should already padded up - // to len + SIMDJSON_PADDING. However, we have no control at this stage - // on how the padding was done. What if the input string was padded with nulls? - // It is quite common for an input string to have an extra null character (C string). - // We do not want to allow 9\0 (where \0 is the null character) inside a JSON - // document, but the string "9\0" by itself is fine. So we make a copy and - // pad the input with spaces when we know that there is just one input element. - // This copy is relatively expensive, but it will almost never be called in - // practice unless you are in the strange scenario where you have many JSON - // documents made of single atoms. - // - std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); - if (copy.get() == nullptr) { return MEMALLOC; } - std::memcpy(copy.get(), value, iter.remaining_len()); - std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); - error_code error = visit_number(iter, copy.get()); - return error; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -// private: - -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { - return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - auto start_index = next_tape_index(iter); - tape.append(start_index+2, start); - tape.append(start_index, end); - return SUCCESS; -} - -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); - iter.dom_parser.open_containers[iter.depth].count = 0; - tape.skip(); // We don't actually *write* the start element until the end. -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - // Write the ending tape element, pointing at the start location - const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; - tape.append(start_tape_index, end); - // Write the start tape element, pointing at the end location (and including count) - // count can overflow if it exceeds 24 bits... so we saturate - // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). - const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; - const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); - return SUCCESS; -} - -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { - // we advance the point, accounting for the fact that we have a NULL termination - tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); - return current_string_buf_loc + sizeof(uint32_t); -} - -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { - uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); - // TODO check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); - // NULL termination is still handy if you expect all your strings to - // be NULL terminated? It comes at a small cost - *dst = 0; - current_string_buf_loc = dst + 1; -} - -} // namespace stage2 -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file src/generic/stage2/tape_builder.h */ - -// -// Implementation-specific overrides -// -namespace simdjson { -namespace arm64 { -namespace { -namespace stage1 { - -simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { - // On ARM, we don't short-circuit this if there are no backslashes, because the branch gives us no - // benefit and therefore makes things worse. - // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } - return find_escaped_branchless(backslash); -} - -} // namespace stage1 -} // unnamed namespace - -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { - this->buf = _buf; - this->len = _len; - return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); -} - -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return arm64::stage1::generic_validate_utf8(buf,len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, stage1_mode::regular); - if (error) { return error; } - return stage2(_doc); -} - -} // namespace arm64 -} // namespace simdjson - -/* begin file include/simdjson/arm64/end.h */ -/* end file include/simdjson/arm64/end.h */ -/* end file src/arm64/dom_parser_implementation.cpp */ -#endif -#if SIMDJSON_IMPLEMENTATION_FALLBACK -/* begin file src/fallback/implementation.cpp */ -/* begin file include/simdjson/fallback/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "fallback" -// #define SIMDJSON_IMPLEMENTATION fallback -/* end file include/simdjson/fallback/begin.h */ - -namespace simdjson { -namespace fallback { - -simdjson_warn_unused error_code implementation::create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr& dst -) const noexcept { - dst.reset( new (std::nothrow) dom_parser_implementation() ); - if (!dst) { return MEMALLOC; } - if (auto err = dst->set_capacity(capacity)) - return err; - if (auto err = dst->set_max_depth(max_depth)) - return err; - return SUCCESS; -} - -} // namespace fallback -} // namespace simdjson - -/* begin file include/simdjson/fallback/end.h */ -/* end file include/simdjson/fallback/end.h */ -/* end file src/fallback/implementation.cpp */ -/* begin file src/fallback/dom_parser_implementation.cpp */ -/* begin file include/simdjson/fallback/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "fallback" -// #define SIMDJSON_IMPLEMENTATION fallback -/* end file include/simdjson/fallback/begin.h */ - -// -// Stage 1 -// -/* begin file src/generic/stage1/find_next_document_index.h */ -namespace simdjson { -namespace fallback { -namespace { - -/** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ':' ',' - * and when the second element is NOT one of these characters: '}' ']' ':' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // Variant: do not count separately, just figure out depth - if(parser.n_structural_indexes == 0) { return 0; } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; - } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; - } - // If we made it to the end, we want to finish counting to see if we have a full document. - switch (parser.buf[parser.structural_indexes[0]]) { - case '}': - obj_cnt--; - break; - case ']': - arr_cnt--; - break; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - if (!arr_cnt && !obj_cnt) { - // We have a complete document. - return parser.n_structural_indexes; - } - return 0; -} - -} // unnamed namespace -} // namespace fallback -} // namespace simdjson -/* end file src/generic/stage1/find_next_document_index.h */ - -namespace simdjson { -namespace fallback { -namespace { -namespace stage1 { - -class structural_scanner { -public: - -simdjson_really_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial) - : buf{_parser.buf}, - next_structural_index{_parser.structural_indexes.get()}, - parser{_parser}, - len{static_cast(_parser.len)}, - partial{_partial} { -} - -simdjson_really_inline void add_structural() { - *next_structural_index = idx; - next_structural_index++; -} - -simdjson_really_inline bool is_continuation(uint8_t c) { - return (c & 0b11000000) == 0b10000000; -} - -simdjson_really_inline void validate_utf8_character() { - // Continuation - if (simdjson_unlikely((buf[idx] & 0b01000000) == 0)) { - // extra continuation - error = UTF8_ERROR; - idx++; - return; - } - - // 2-byte - if ((buf[idx] & 0b00100000) == 0) { - // missing continuation - if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) { - if (idx+1 > len && is_streaming(partial)) { idx = len; return; } - error = UTF8_ERROR; - idx++; - return; - } - // overlong: 1100000_ 10______ - if (buf[idx] <= 0b11000001) { error = UTF8_ERROR; } - idx += 2; - return; - } - - // 3-byte - if ((buf[idx] & 0b00010000) == 0) { - // missing continuation - if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) { - if (idx+2 > len && is_streaming(partial)) { idx = len; return; } - error = UTF8_ERROR; - idx++; - return; - } - // overlong: 11100000 100_____ ________ - if (buf[idx] == 0b11100000 && buf[idx+1] <= 0b10011111) { error = UTF8_ERROR; } - // surrogates: U+D800-U+DFFF 11101101 101_____ - if (buf[idx] == 0b11101101 && buf[idx+1] >= 0b10100000) { error = UTF8_ERROR; } - idx += 3; - return; - } - - // 4-byte - // missing continuation - if (simdjson_unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) { - if (idx+2 > len && is_streaming(partial)) { idx = len; return; } - error = UTF8_ERROR; - idx++; - return; - } - // overlong: 11110000 1000____ ________ ________ - if (buf[idx] == 0b11110000 && buf[idx+1] <= 0b10001111) { error = UTF8_ERROR; } - // too large: > U+10FFFF: - // 11110100 (1001|101_)____ - // 1111(1___|011_|0101) 10______ - // also includes 5, 6, 7 and 8 byte characters: - // 11111___ - if (buf[idx] == 0b11110100 && buf[idx+1] >= 0b10010000) { error = UTF8_ERROR; } - if (buf[idx] >= 0b11110101) { error = UTF8_ERROR; } - idx += 4; -} - -// Returns true if the string is unclosed. -simdjson_really_inline bool validate_string() { - idx++; // skip first quote - while (idx < len && buf[idx] != '"') { - if (buf[idx] == '\\') { - idx += 2; - } else if (simdjson_unlikely(buf[idx] & 0b10000000)) { - validate_utf8_character(); - } else { - if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; } - idx++; - } - } - if (idx >= len) { return true; } - return false; -} - -simdjson_really_inline bool is_whitespace_or_operator(uint8_t c) { - switch (c) { - case '{': case '}': case '[': case ']': case ',': case ':': - case ' ': case '\r': case '\n': case '\t': - return true; - default: - return false; - } -} - -// -// Parse the entire input in STEP_SIZE-byte chunks. -// -simdjson_really_inline error_code scan() { - bool unclosed_string = false; - for (;idx 0) { - if(parser.structural_indexes[0] == 0) { - // If the buffer is partial and we started at index 0 but the document is - // incomplete, it's too big to parse. - return CAPACITY; - } else { - // It is possible that the document could be parsed, we just had a lot - // of white space. - parser.n_structural_indexes = 0; - return EMPTY; - } - } - parser.n_structural_indexes = new_structural_indexes; - } else if(partial == stage1_mode::streaming_final) { - if(unclosed_string) { parser.n_structural_indexes--; } - // We truncate the input to the end of the last complete document (or zero). - // Because partial == stage1_mode::streaming_final, it means that we may - // silently ignore trailing garbage. Though it sounds bad, we do it - // deliberately because many people who have streams of JSON documents - // will truncate them for processing. E.g., imagine that you are uncompressing - // the data from a size file or receiving it in chunks from the network. You - // may not know where exactly the last document will be. Meanwhile the - // document_stream instances allow people to know the JSON documents they are - // parsing (see the iterator.source() method). - parser.n_structural_indexes = find_next_document_index(parser); - // We store the initial n_structural_indexes so that the client can see - // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, - // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, - // otherwise, it will copy some prior index. - parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; - // This next line is critical, do not change it unless you understand what you are - // doing. - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - if (parser.n_structural_indexes == 0) { return EMPTY; } - } else if(unclosed_string) { error = UNCLOSED_STRING; } - return error; -} - -private: - const uint8_t *buf; - uint32_t *next_structural_index; - dom_parser_implementation &parser; - uint32_t len; - uint32_t idx{0}; - error_code error{SUCCESS}; - stage1_mode partial; -}; // structural_scanner - -} // namespace stage1 -} // unnamed namespace - -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode partial) noexcept { - this->buf = _buf; - this->len = _len; - stage1::structural_scanner scanner(*this, partial); - return scanner.scan(); -} - -// big table for the minifier -static uint8_t jump_table[256 * 3] = { - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, - 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, - 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, - 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, - 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, -}; - -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - size_t i = 0, pos = 0; - uint8_t quote = 0; - uint8_t nonescape = 1; - - while (i < len) { - unsigned char c = buf[i]; - uint8_t *meta = jump_table + 3 * c; - - quote = quote ^ (meta[0] & nonescape); - dst[pos] = c; - pos += meta[2] | quote; - - i += 1; - nonescape = uint8_t(~nonescape) | (meta[1]); - } - dst_len = pos; // we intentionally do not work with a reference - // for fear of aliasing - return quote ? UNCLOSED_STRING : SUCCESS; -} - -// credit: based on code from Google Fuchsia (Apache Licensed) -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - const uint8_t *data = reinterpret_cast(buf); - uint64_t pos = 0; - uint32_t code_point = 0; - while (pos < len) { - // check of the next 8 bytes are ascii. - uint64_t next_pos = pos + 16; - if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii - uint64_t v1; - memcpy(&v1, data + pos, sizeof(uint64_t)); - uint64_t v2; - memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); - uint64_t v{v1 | v2}; - if ((v & 0x8080808080808080) == 0) { - pos = next_pos; - continue; - } - } - unsigned char byte = data[pos]; - if (byte < 0b10000000) { - pos++; - continue; - } else if ((byte & 0b11100000) == 0b11000000) { - next_pos = pos + 2; - if (next_pos > len) { return false; } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } - // range check - code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); - if (code_point < 0x80 || 0x7ff < code_point) { return false; } - } else if ((byte & 0b11110000) == 0b11100000) { - next_pos = pos + 3; - if (next_pos > len) { return false; } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; } - // range check - code_point = (byte & 0b00001111) << 12 | - (data[pos + 1] & 0b00111111) << 6 | - (data[pos + 2] & 0b00111111); - if (code_point < 0x800 || 0xffff < code_point || - (0xd7ff < code_point && code_point < 0xe000)) { - return false; - } - } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000 - next_pos = pos + 4; - if (next_pos > len) { return false; } - if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } - if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; } - if ((data[pos + 3] & 0b11000000) != 0b10000000) { return false; } - // range check - code_point = - (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 | - (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111); - if (code_point <= 0xffff || 0x10ffff < code_point) { return false; } - } else { - // we may have a continuation - return false; - } - pos = next_pos; - } - return true; -} - -} // namespace fallback -} // namespace simdjson - -// -// Stage 2 -// -/* begin file src/generic/stage2/tape_builder.h */ -/* begin file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/logger.h */ -// This is for an internal-only stage 2 specific logger. -// Set LOG_ENABLED = true to log what stage 2 is doing! -namespace simdjson { -namespace fallback { -namespace { -namespace logger { - - static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; - -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - static constexpr const int LOG_EVENT_LEN = 20; - static constexpr const int LOG_BUFFER_LEN = 30; - static constexpr const int LOG_SMALL_BUFFER_LEN = 10; - static constexpr const int LOG_INDEX_LEN = 5; - - static int log_depth; // Not threadsafe. Log only. - - // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } - } - - // Print the header and set up log_start - static simdjson_really_inline void log_start() { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); - } - } - - simdjson_unused static simdjson_really_inline void log_string(const char *message) { - if (LOG_ENABLED) { - printf("%s\n", message); - } - } - - // Logs a single line from the stage 2 DOM parser - template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { - if (LOG_ENABLED) { - printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); - auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; - auto next_index = structurals.next_structural; - auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); - auto next = &structurals.buf[*next_index]; - { - // Print the next N characters in the buffer. - printf("| "); - // Otherwise, print the characters starting from the buffer position. - // Print spaces for unprintable or newline characters. - for (int i=0;i - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; - - /** - * Create an iterator capable of walking a JSON document. - * - * The document must have already passed through stage 1. - */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); - - /** - * Look at the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *peek() const noexcept; - /** - * Advance to the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *advance() noexcept; - /** - * Get the remaining length of the document, from the start of the current token. - */ - simdjson_really_inline size_t remaining_len() const noexcept; - /** - * Check if we are at the end of the document. - * - * If this is true, there are no more tokens. - */ - simdjson_really_inline bool at_eof() const noexcept; - /** - * Check if we are at the beginning of the document. - */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; - - /** - * Log that a value has been found. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_value(const char *type) const noexcept; - /** - * Log the start of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; - /** - * Log the end of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; - /** - * Log an error. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_error(const char *error) const noexcept; - - template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; - template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; -}; - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { - logger::log_start(); - - // - // Start the document - // - if (at_eof()) { return EMPTY; } - log_start_value("document"); - SIMDJSON_TRY( visitor.visit_document_start(*this) ); - - // - // Read first value - // - { - auto value = advance(); - - // Make sure the outer object or array is closed before continuing; otherwise, there are ways we - // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 - if (!STREAMING) { - switch (*value) { - case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; - } - } - - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; - } - } - goto document_end; - -// -// Object parser states -// -object_begin: - log_start_value("object"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = false; - SIMDJSON_TRY( visitor.visit_object_start(*this) ); - - { - auto key = advance(); - if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.increment_count(*this) ); - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - -object_field: - if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -object_continue: - switch (*advance()) { - case ',': - SIMDJSON_TRY( visitor.increment_count(*this) ); - { - auto key = advance(); - if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - goto object_field; - case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; - default: log_error("No comma between object fields"); return TAPE_ERROR; - } - -scope_end: - depth--; - if (depth == 0) { goto document_end; } - if (dom_parser.is_array[depth]) { goto array_continue; } - goto object_continue; - -// -// Array parser states -// -array_begin: - log_start_value("array"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = true; - SIMDJSON_TRY( visitor.visit_array_start(*this) ); - SIMDJSON_TRY( visitor.increment_count(*this) ); - -array_value: - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -array_continue: - switch (*advance()) { - case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; - case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; - default: log_error("Missing comma between array values"); return TAPE_ERROR; - } - -document_end: - log_end_value("document"); - SIMDJSON_TRY( visitor.visit_document_end(*this) ); - - dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - - // If we didn't make it to the end, it's an error - if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { - log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - return TAPE_ERROR; - } - - return SUCCESS; - -} // walk_document() - -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { -} - -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { - return &buf[*(next_structural)]; -} -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { - return &buf[*(next_structural++)]; -} -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { - return dom_parser.len - *(next_structural-1); -} - -simdjson_really_inline bool json_iterator::at_eof() const noexcept { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; -} -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { - return next_structural == dom_parser.structural_indexes.get(); -} -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { - return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; -} - -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { - logger::log_line(*this, "", type, ""); -} - -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { - logger::log_line(*this, "+", type, ""); - if (logger::LOG_ENABLED) { logger::log_depth++; } -} - -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { - if (logger::LOG_ENABLED) { logger::log_depth--; } - logger::log_line(*this, "-", type, ""); -} - -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { - logger::log_line(*this, "", "ERROR", error); -} - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_root_string(*this, value); - case 't': return visitor.visit_root_true_atom(*this, value); - case 'f': return visitor.visit_root_false_atom(*this, value); - case 'n': return visitor.visit_root_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_root_number(*this, value); - default: - log_error("Document starts with a non-value character"); - return TAPE_ERROR; - } -} -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_string(*this, value); - case 't': return visitor.visit_true_atom(*this, value); - case 'f': return visitor.visit_false_atom(*this, value); - case 'n': return visitor.visit_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_number(*this, value); - default: - log_error("Non-value found when value was expected!"); - return TAPE_ERROR; - } -} - -} // namespace stage2 -} // unnamed namespace -} // namespace fallback -} // namespace simdjson -/* end file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/tape_writer.h */ -namespace simdjson { -namespace fallback { -namespace { -namespace stage2 { - -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; - - /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; - - /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; - - /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; - - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; - - /** - * Skip the current tape entry without writing. - * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_really_inline void skip() noexcept; - - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_really_inline void skip_large_integer() noexcept; - - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_really_inline void skip_double() noexcept; - - /** - * Write a value to a known location on tape. - * - * Used to go back and write out the start of a container after the container ends. - */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; - -private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct number_writer - -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); -} - -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; -} - -/** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); -} - -simdjson_really_inline void tape_writer::skip() noexcept { - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; -} - -template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); -} - -} // namespace stage2 -} // unnamed namespace -} // namespace fallback -} // namespace simdjson -/* end file src/generic/stage2/tape_writer.h */ - -namespace simdjson { -namespace fallback { -namespace { -namespace stage2 { - -struct tape_builder { - template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept; - - /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; - /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; - - /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; - /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; - /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; - - /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; - /** - * Called when a key in a field is encountered. - * - * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array - * will be called after this with the field value. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; - /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; - /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; - - /** - * Called when a string, number, boolean or null is found. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; - /** - * Called when a string, number, boolean or null is found at the top level of a document (i.e. - * when there is no array or object and the entire document is a single string, number, boolean or - * null. - * - * This is separate from primitive() because simdjson's normal primitive parsing routines assume - * there is at least one more token after the value, which is only true in an array or object. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; - - /** Next location to write to tape */ - tape_writer tape; -private: - /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; - - simdjson_really_inline tape_builder(dom::document &doc) noexcept; - - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; -}; // class tape_builder - -template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept { - dom_parser.doc = &doc; - json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); - tape_builder builder(doc); - return iter.walk_document(builder); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_root_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { - constexpr uint32_t start_tape_index = 0; - tape.append(start_tape_index, internal::tape_type::ROOT); - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { - return visit_string(iter, key, true); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 - return SUCCESS; -} - -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { - iter.log_value(key ? "key" : "string"); - uint8_t *dst = on_start_string(iter); - dst = stringparsing::parse_string(value+1, dst); - if (dst == nullptr) { - iter.log_error("Invalid escape in string"); - return STRING_ERROR; - } - on_end_string(dst); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { - return visit_string(iter, value); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("number"); - return numberparsing::parse_number(value, tape); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { - // - // We need to make a copy to make sure that the string is space terminated. - // This is not about padding the input, which should already padded up - // to len + SIMDJSON_PADDING. However, we have no control at this stage - // on how the padding was done. What if the input string was padded with nulls? - // It is quite common for an input string to have an extra null character (C string). - // We do not want to allow 9\0 (where \0 is the null character) inside a JSON - // document, but the string "9\0" by itself is fine. So we make a copy and - // pad the input with spaces when we know that there is just one input element. - // This copy is relatively expensive, but it will almost never be called in - // practice unless you are in the strange scenario where you have many JSON - // documents made of single atoms. - // - std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); - if (copy.get() == nullptr) { return MEMALLOC; } - std::memcpy(copy.get(), value, iter.remaining_len()); - std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); - error_code error = visit_number(iter, copy.get()); - return error; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -// private: - -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { - return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - auto start_index = next_tape_index(iter); - tape.append(start_index+2, start); - tape.append(start_index, end); - return SUCCESS; -} - -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); - iter.dom_parser.open_containers[iter.depth].count = 0; - tape.skip(); // We don't actually *write* the start element until the end. -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - // Write the ending tape element, pointing at the start location - const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; - tape.append(start_tape_index, end); - // Write the start tape element, pointing at the end location (and including count) - // count can overflow if it exceeds 24 bits... so we saturate - // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). - const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; - const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); - return SUCCESS; -} - -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { - // we advance the point, accounting for the fact that we have a NULL termination - tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); - return current_string_buf_loc + sizeof(uint32_t); -} - -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { - uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); - // TODO check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); - // NULL termination is still handy if you expect all your strings to - // be NULL terminated? It comes at a small cost - *dst = 0; - current_string_buf_loc = dst + 1; -} - -} // namespace stage2 -} // unnamed namespace -} // namespace fallback -} // namespace simdjson -/* end file src/generic/stage2/tape_builder.h */ - -namespace simdjson { -namespace fallback { - -simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, stage1_mode::regular); - if (error) { return error; } - return stage2(_doc); -} - -} // namespace fallback -} // namespace simdjson - -/* begin file include/simdjson/fallback/end.h */ -/* end file include/simdjson/fallback/end.h */ -/* end file src/fallback/dom_parser_implementation.cpp */ -#endif -#if SIMDJSON_IMPLEMENTATION_ICELAKE -/* begin file src/icelake/implementation.cpp */ -/* begin file include/simdjson/icelake/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "icelake" -// #define SIMDJSON_IMPLEMENTATION icelake -SIMDJSON_TARGET_ICELAKE -/* end file include/simdjson/icelake/begin.h */ - -namespace simdjson { -namespace icelake { - -simdjson_warn_unused error_code implementation::create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr& dst -) const noexcept { - dst.reset( new (std::nothrow) dom_parser_implementation() ); - if (!dst) { return MEMALLOC; } - if (auto err = dst->set_capacity(capacity)) - return err; - if (auto err = dst->set_max_depth(max_depth)) - return err; - return SUCCESS; -} - -} // namespace icelake -} // namespace simdjson - -/* begin file include/simdjson/icelake/end.h */ -SIMDJSON_UNTARGET_ICELAKE -/* end file include/simdjson/icelake/end.h */ - -/* end file src/icelake/implementation.cpp */ -/* begin file src/icelake/dom_parser_implementation.cpp */ -/* begin file include/simdjson/icelake/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "icelake" -// #define SIMDJSON_IMPLEMENTATION icelake -SIMDJSON_TARGET_ICELAKE -/* end file include/simdjson/icelake/begin.h */ - -// -// Stage 1 -// - -namespace simdjson { -namespace icelake { -namespace { - -using namespace simd; - -struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); - // ASCII white-space ('\r','\n','\t',' ') - simdjson_really_inline uint64_t whitespace() const noexcept; - // non-quote structural characters (comma, colon, braces, brackets) - simdjson_really_inline uint64_t op() const noexcept; - // neither a structural character nor a white-space, so letters, numbers and quotes - simdjson_really_inline uint64_t scalar() const noexcept; - - uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') - uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) -}; - -simdjson_really_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } -simdjson_really_inline uint64_t json_character_block::op() const noexcept { return _op; } -simdjson_really_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } - -// This identifies structural characters (comma, colon, braces, brackets), -// and ASCII white-space ('\r','\n','\t',' '). -simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { - // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why - // we can't use the generic lookup_16. - const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); - - // The 6 operators (:,[]{}) have these values: - // - // , 2C - // : 3A - // [ 5B - // { 7B - // ] 5D - // } 7D - // - // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. - // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then - // match it (against | 0x20). - // - // To prevent recognizing other characters, everything else gets compared with 0, which cannot - // match due to the | 0x20. - // - // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , - // and :. This gets caught in stage 2, which checks the actual character to ensure the right - // operators are in the right places. - const auto op_table = simd8::repeat_16( - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B - ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D - ); - - // We compute whitespace and op separately. If later code only uses one or the - // other, given the fact that all functions are aggressively inlined, we can - // hope that useless computations will be omitted. This is namely case when - // minifying (we only need whitespace). - - const uint64_t whitespace = in.eq({ - _mm512_shuffle_epi8(whitespace_table, in.chunks[0]) - }); - // Turn [ and ] into { and } - const simd8x64 curlified{ - in.chunks[0] | 0x20 - }; - const uint64_t op = curlified.eq({ - _mm512_shuffle_epi8(op_table, in.chunks[0]) - }); - - return { whitespace, op }; -} - -simdjson_really_inline bool is_ascii(const simd8x64& input) { - return input.reduce_or().is_ascii(); -} - -simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); -} - -simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_third_byte | is_fourth_byte) > int8_t(0); -} - -} // unnamed namespace -} // namespace icelake -} // namespace simdjson - -/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ -namespace simdjson { -namespace icelake { -namespace { -namespace utf8_validation { - -using namespace simd; - - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { -// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) -// Bit 1 = Too Long (ASCII followed by continuation) -// Bit 2 = Overlong 3-byte -// Bit 4 = Surrogate -// Bit 5 = Overlong 2-byte -// Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1<<6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); - constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, - CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 - ); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); - return (byte_1_high & byte_1_low & byte_2_high); - } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, - const simd8 prev_input, const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; - } - - // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. - // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ -#if SIMDJSON_IMPLEMENTATION_ICELAKE - static const uint8_t max_array[64] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; -#else - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; -#endif - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); - } - - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes - // too large in the first of two bytes. - simdjson_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } - - simdjson_really_inline void check_next_input(const simd8x64& input) { - if(simdjson_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 1) - ||(simd8x64::NUM_CHUNKS == 2) - || (simd8x64::NUM_CHUNKS == 4), - "We support one, two or four chunks per 64-byte block."); - if(simd8x64::NUM_CHUNKS == 1) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if(simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if(simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } - } - // do not forget to call check_eof! - simdjson_really_inline error_code errors() { - return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; - } - - }; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ -// defining SIMDJSON_CUSTOM_BIT_INDEXER allows us to provide our own bit_indexer::write -#define SIMDJSON_CUSTOM_BIT_INDEXER -/* begin file src/generic/stage1/json_structural_indexer.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -/* begin file src/generic/stage1/buf_block_reader.h */ -namespace simdjson { -namespace icelake { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with spaces -template -struct buf_block_reader { -public: - simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this - * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there - * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdjson_really_inline size_t get_remainder(uint8_t *dst) const; - simdjson_really_inline void advance(); -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text_64(const uint8_t *text) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text(const simd8x64& in) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdjson_unused static char * format_mask(uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} - -template -simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } - -template -simdjson_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { - if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdjson_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file src/generic/stage1/buf_block_reader.h */ -/* begin file src/generic/stage1/json_string_scanner.h */ -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { - -struct json_string_block { - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : - _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} - - // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } - // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } - // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } - // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } - // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } - // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } - // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } - - // backslash characters - uint64_t _backslash; - // escaped characters (backslashed--does not include the hex characters after \u) - uint64_t _escaped; - // real quotes (non-backslashed ones) - uint64_t _quote; - // string characters (includes start quote but not end quote) - uint64_t _in_string; -}; - -// Scans blocks for string characters, storing the state necessary to do so -class json_string_scanner { -public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); - - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; - // Whether the first character of the next iteration is escaped. - uint64_t prev_escaped = 0ULL; -}; - -// -// Finds escaped characters (characters following \). -// -// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). -// -// Does this by: -// - Shift the escape mask to get potentially escaped characters (characters after backslashes). -// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) -// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) -// -// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all -// escape sequences, filters out the ones that start on even bits, and adds that to the mask of -// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since -// the start bit causes a carry), and leaves even-bit sequences alone. -// -// Example: -// -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape -// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape -// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later -// invert_mask | | cxxx c xx c| even_seq << 1 -// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit -// escaped | x | x x x x x x x x | -// desired | x | x x x x x x x x | -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// -simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { - // If there was overflow, pretend the first character isn't a backslash - backslash &= ~prev_escaped; - uint64_t follows_escape = backslash << 1 | prev_escaped; - - // Get sequences starting on even bits by clearing out the odd series using + - const uint64_t even_bits = 0x5555555555555555ULL; - uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; - uint64_t sequences_starting_on_even_bits; - prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); - uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. - - // Mask every other backslashed character as an escaped character - // Flip the mask for sequences that start on even bits, to correct them - return (even_bits ^ invert_mask) & follows_escape; -} - -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = find_escaped(backslash); - const uint64_t quote = in.eq('"') & ~escaped; - - // - // prefix_xor flips on bits inside the string (and flips off the end quote). - // - // Then we xor with prev_in_string: if we were in a string already, its effect is flipped - // (characters inside strings are outside, and characters outside strings are inside). - // - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; - - // - // Check if we're still in a string at the end of the box so the next block will know - // - // right shift of a signed value expected to be well-defined and standard - // compliant as of C++20, John Regher from Utah U. says this is fine code - // - prev_in_string = uint64_t(static_cast(in_string) >> 63); - - // Use ^ to turn the beginning quote off, and the end quote on. - - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_string_block( - backslash, - escaped, - quote, - in_string - ); -} - -simdjson_really_inline error_code json_string_scanner::finish() { - if (prev_in_string) { - return UNCLOSED_STRING; - } - return SUCCESS; -} - -} // namespace stage1 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file src/generic/stage1/json_string_scanner.h */ -/* begin file src/generic/stage1/json_scanner.h */ -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { - -/** - * A block of scanned json, with information on operators and scalars. - * - * We seek to identify pseudo-structural characters. Anything that is inside - * a string must be omitted (hence & ~_string.string_tail()). - * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The - * term 'structural character' is from the JSON RFC. - * 2. We have the 'scalar pseudo-structural characters'. - * Scalars are quotes, and any character except structural characters and white space. - * - * To identify the scalar pseudo-structural characters, we must look at what comes - * before them: it must be a space, a quote or a structural characters. - * Starting with simdjson v0.3, we identify them by - * negation: we identify everything that is followed by a non-quote scalar, - * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. - */ -struct json_block { -public: - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - - /** - * The start of structurals. - * In simdjson prior to v0.3, these were called the pseudo-structural characters. - **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } - /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } - - // Helpers - - /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } - /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } - - // string and escape characters - json_string_block _string; - // whitespace, structural characters ('operators'), scalars - json_character_block _characters; - // whether the previous character was a scalar - uint64_t _follows_potential_nonquote_scalar; -private: - // Potential structurals (i.e. disregarding strings) - - /** - * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". - * They may reside inside a string. - **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } - /** - * The start of non-operator runs, like 123, true and "abc". - * It main reside inside a string. - **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space - // then we know that it is irrelevant structurally. - return _characters.scalar() & ~follows_potential_scalar(); - } - /** - * Whether the given character is immediately after a non-operator like 123, true. - * The characters following a quote are not included. - */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { - // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character - // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a - // white space. - // It is understood that within quoted region, anything at all could be marked (irrelevant). - return _follows_potential_nonquote_scalar; - } -}; - -/** - * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. - * - * The scanner starts by calculating two distinct things: - * - string characters (taking \" into account) - * - structural characters or 'operators' ([]{},:, comma) - * and scalars (runs of non-operators like 123, true and "abc") - * - * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: - * in particular, the operator/scalar bit will find plenty of things that are actually part of - * strings. When we're done, json_block will fuse the two together by masking out tokens that are - * part of a string. - */ -class json_scanner { -public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Whether the last character of the previous iteration is part of a scalar token - // (anything except whitespace or a structural character/'operator'). - uint64_t prev_scalar = 0ULL; - json_string_scanner string_scanner{}; -}; - - -// -// Check if the current character immediately follows a matching character. -// -// For example, this checks for quotes with backslashes in front of them: -// -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); -// -simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; -} - -simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { - json_string_block strings = string_scanner.next(in); - // identifies the white-space and the structural characters - json_character_block characters = json_character_block::classify(in); - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). - // - // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) - // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential - // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we - // may need to add an extra check when parsing strings. - // - // Performance: there are many ways to skin this cat. - const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); - uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_block( - strings,// strings is a function-local object so either it moves or the copy is elided. - characters, - follows_nonquote_scalar - ); -} - -simdjson_really_inline error_code json_scanner::finish() { - return string_scanner.finish(); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file src/generic/stage1/json_scanner.h */ -/* begin file src/generic/stage1/json_minifier.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { - -class json_minifier { -public: - template - static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; - -private: - simdjson_really_inline json_minifier(uint8_t *_dst) - : dst{_dst} - {} - template - simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); - json_scanner scanner{}; - uint8_t *dst; -}; - -simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { - uint64_t mask = block.whitespace(); - dst += in.compress(mask, dst); -} - -simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { - error_code error = scanner.finish(); - if (error) { dst_len = 0; return error; } - dst_len = dst - dst_start; - return SUCCESS; -} - -template<> -simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - simd::simd8x64 in_2(block_buf+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1); - this->next(in_2, block_2); - reader.advance(); -} - -template<> -simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - json_block block_1 = scanner.next(in_1); - this->next(block_buf, block_1); - reader.advance(); -} - -template -error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { - buf_block_reader reader(buf, len); - json_minifier minifier(dst); - - // Index the first n-1 blocks - while (reader.has_full_block()) { - minifier.step(reader.full_block(), reader); - } - - // Index the last (remainder) block, padded with spaces - uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); - if (remaining_bytes > 0) { - // We do not want to write directly to the output stream. Rather, we write - // to a local buffer (for safety). - uint8_t out_block[STEP_SIZE]; - uint8_t * const guarded_dst{minifier.dst}; - minifier.dst = out_block; - minifier.step(block, reader); - size_t to_write = minifier.dst - out_block; - // In some cases, we could be enticed to consider the padded spaces - // as part of the string. This is fine as long as we do not write more - // than we consumed. - if(to_write > remaining_bytes) { to_write = remaining_bytes; } - memcpy(guarded_dst, out_block, to_write); - minifier.dst = guarded_dst + to_write; - } - return minifier.finish(dst, dst_len); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file src/generic/stage1/json_minifier.h */ -/* begin file src/generic/stage1/find_next_document_index.h */ -namespace simdjson { -namespace icelake { -namespace { - -/** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ':' ',' - * and when the second element is NOT one of these characters: '}' ']' ':' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // Variant: do not count separately, just figure out depth - if(parser.n_structural_indexes == 0) { return 0; } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; - } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; - } - // If we made it to the end, we want to finish counting to see if we have a full document. - switch (parser.buf[parser.structural_indexes[0]]) { - case '}': - obj_cnt--; - break; - case ']': - arr_cnt--; - break; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - if (!arr_cnt && !obj_cnt) { - // We have a complete document. - return parser.n_structural_indexes; - } - return 0; -} - -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file src/generic/stage1/find_next_document_index.h */ - -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { - -class bit_indexer { -public: - uint32_t *tail; - - simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} - - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this - // - // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own - // version of the code. -#ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); -#else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; -#if defined(SIMDJSON_PREFER_REVERSE_BITS) - /** - * ARM lacks a fast trailing zero instruction, but it has a fast - * bit reversal instruction and a fast leading zero instruction. - * Thus it may be profitable to reverse the bits (once) and then - * to rely on a sequence of instructions that call the leading - * zero instruction. - * - * Performance notes: - * The chosen routine is not optimal in terms of data dependency - * since zero_leading_bit might require two instructions. However, - * it tends to minimize the total number of instructions which is - * beneficial. - */ - - uint64_t rev_bits = reverse_bits(bits); - int cnt = static_cast(count_ones(bits)); - int i = 0; - // Do the first 8 all together - for (; i<8; i++) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (simdjson_unlikely(cnt > 8)) { - i = 8; - for (; i<16; i++) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (simdjson_unlikely(cnt > 16)) { - i = 16; - while (rev_bits != 0) { - int lz = leading_zeroes(rev_bits); - this->tail[i++] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - } - } - this->tail += cnt; -#else // SIMDJSON_PREFER_REVERSE_BITS - /** - * Under recent x64 systems, we often have both a fast trailing zero - * instruction and a fast 'clear-lower-bit' instruction so the following - * algorithm can be competitive. - */ - - int cnt = static_cast(count_ones(bits)); - // Do the first 8 all together - for (int i=0; i<8; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (simdjson_unlikely(cnt > 8)) { - for (int i=8; i<16; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (simdjson_unlikely(cnt > 16)) { - int i = 16; - do { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - i++; - } while (i < cnt); - } - } - - this->tail += cnt; -#endif - } -#endif // SIMDJSON_CUSTOM_BIT_INDEXER - -}; - -class json_structural_indexer { -public: - /** - * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. - * - * @param partial Setting the partial parameter to true allows the find_structural_bits to - * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If - * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. - */ - template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; - -private: - simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); - template - simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); - - json_scanner scanner{}; - utf8_checker checker{}; - bit_indexer indexer; - uint64_t prev_structurals = 0; - uint64_t unescaped_chars_error = 0; -}; - -simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} - -// Skip the last character if it is partial -simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { - if (simdjson_unlikely(len < 3)) { - switch (len) { - case 2: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left - return len; - case 1: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - return len; - case 0: - return len; - } - } - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left - return len; -} - -// -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. -// -template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { - if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - // We guard the rest of the code so that we can assume that len > 0 throughout. - if (len == 0) { return EMPTY; } - if (is_streaming(partial)) { - len = trim_partial_utf8(buf, len); - // If you end up with an empty window after trimming - // the partial UTF-8 bytes, then chances are good that you - // have an UTF-8 formatting error. - if(len == 0) { return UTF8_ERROR; } - } - buf_block_reader reader(buf, len); - json_structural_indexer indexer(parser.structural_indexes.get()); - - // Read all but the last block - while (reader.has_full_block()) { - indexer.step(reader.full_block(), reader); - } - // Take care of the last block (will always be there unless file is empty which is - // not supposed to happen.) - uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } - indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block); - simd::simd8x64 in_2(block+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1, reader.block_index()); - this->next(in_2, block_2, reader.block_index()+64); - reader.advance(); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block); - json_block block_1 = scanner.next(in_1); - this->next(in_1, block_1, reader.block_index()); - reader.advance(); -} - -simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { - uint64_t unescaped = in.lteq(0x1F); - checker.check_next_input(in); - indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser - prev_structurals = block.structural_start(); - unescaped_chars_error |= block.non_quote_inside_string(unescaped); -} - -simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { - // Write out the final iteration's structurals - indexer.write(uint32_t(idx-64), prev_structurals); - error_code error = scanner.finish(); - // We deliberately break down the next expression so that it is - // human readable. - const bool should_we_exit = is_streaming(partial) ? - ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING - : (error != SUCCESS); // if partial is false, we must have SUCCESS - const bool have_unclosed_string = (error == UNCLOSED_STRING); - if (simdjson_unlikely(should_we_exit)) { return error; } - - if (unescaped_chars_error) { - return UNESCAPED_CHARS; - } - parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); - /*** - * The On Demand API requires special padding. - * - * This is related to https://github.com/simdjson/simdjson/issues/906 - * Basically, we want to make sure that if the parsing continues beyond the last (valid) - * structural character, it quickly stops. - * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. - * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing - * continues, then it must be [,] or }. - * Suppose it is ] or }. We backtrack to the first character, what could it be that would - * not trigger an error? It could be ] or } but no, because you can't start a document that way. - * It can't be a comma, a colon or any simple value. So the only way we could continue is - * if the repeated character is [. But if so, the document must start with [. But if the document - * starts with [, it should end with ]. If we enforce that rule, then we would get - * ][[ which is invalid. - * - * This is illustrated with the test array_iterate_unclosed_error() on the following input: - * R"({ "a": [,,)" - **/ - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final - parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); - parser.structural_indexes[parser.n_structural_indexes + 2] = 0; - parser.next_structural_index = 0; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - return EMPTY; - } - if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { - return UNEXPECTED_ERROR; - } - if (partial == stage1_mode::streaming_partial) { - // If we have an unclosed string, then the last structural - // will be the quote and we want to make sure to omit it. - if(have_unclosed_string) { - parser.n_structural_indexes--; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } - } - // We truncate the input to the end of the last complete document (or zero). - auto new_structural_indexes = find_next_document_index(parser); - if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - if(parser.structural_indexes[0] == 0) { - // If the buffer is partial and we started at index 0 but the document is - // incomplete, it's too big to parse. - return CAPACITY; - } else { - // It is possible that the document could be parsed, we just had a lot - // of white space. - parser.n_structural_indexes = 0; - return EMPTY; - } - } - - parser.n_structural_indexes = new_structural_indexes; - } else if (partial == stage1_mode::streaming_final) { - if(have_unclosed_string) { parser.n_structural_indexes--; } - // We truncate the input to the end of the last complete document (or zero). - // Because partial == stage1_mode::streaming_final, it means that we may - // silently ignore trailing garbage. Though it sounds bad, we do it - // deliberately because many people who have streams of JSON documents - // will truncate them for processing. E.g., imagine that you are uncompressing - // the data from a size file or receiving it in chunks from the network. You - // may not know where exactly the last document will be. Meanwhile the - // document_stream instances allow people to know the JSON documents they are - // parsing (see the iterator.source() method). - parser.n_structural_indexes = find_next_document_index(parser); - // We store the initial n_structural_indexes so that the client can see - // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, - // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, - // otherwise, it will copy some prior index. - parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; - // This next line is critical, do not change it unless you understand what you are - // doing. - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - // We tolerate an unclosed string at the very end of the stream. Indeed, users - // often load their data in bulk without being careful and they want us to ignore - // the trailing garbage. - return EMPTY; - } - } - checker.check_eof(); - return checker.errors(); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file src/generic/stage1/json_structural_indexer.h */ -// We must not forget to undefine it now: -#undef SIMDJSON_CUSTOM_BIT_INDEXER - -/** - * We provide a custom version of bit_indexer::write using - * naked intrinsics. - * TODO: make this code more elegant. - */ -// Under GCC 12, the intrinsic _mm512_extracti32x4_epi32 may generate 'maybe uninitialized'. -// as a workaround, we disable warnings within the following function. -SIMDJSON_PUSH_DISABLE_ALL_WARNINGS -namespace simdjson { namespace icelake { namespace { namespace stage1 { -simdjson_really_inline void bit_indexer::write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) { return; } - - const __m512i indexes = _mm512_maskz_compress_epi8(bits, _mm512_set_epi32( - 0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130, - 0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120, - 0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110, - 0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100 - )); - const __m512i start_index = _mm512_set1_epi32(idx); - - const auto count = count_ones(bits); - __m512i t0 = _mm512_cvtepu8_epi32(_mm512_castsi512_si128(indexes)); - _mm512_storeu_si512(this->tail, _mm512_add_epi32(t0, start_index)); - - if(count > 16) { - const __m512i t1 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 1)); - _mm512_storeu_si512(this->tail + 16, _mm512_add_epi32(t1, start_index)); - if(count > 32) { - const __m512i t2 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 2)); - _mm512_storeu_si512(this->tail + 32, _mm512_add_epi32(t2, start_index)); - if(count > 48) { - const __m512i t3 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 3)); - _mm512_storeu_si512(this->tail + 48, _mm512_add_epi32(t3, start_index)); - } - } - } - this->tail += count; -} -}}}} -SIMDJSON_POP_DISABLE_WARNINGS - -/* begin file src/generic/stage1/utf8_validator.h */ -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t * input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return c.errors() == error_code::SUCCESS; -} - -bool generic_validate_utf8(const char * input, size_t length) { - return generic_validate_utf8(reinterpret_cast(input),length); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file src/generic/stage1/utf8_validator.h */ - -// -// Stage 2 -// -/* begin file src/generic/stage2/tape_builder.h */ -/* begin file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/logger.h */ -// This is for an internal-only stage 2 specific logger. -// Set LOG_ENABLED = true to log what stage 2 is doing! -namespace simdjson { -namespace icelake { -namespace { -namespace logger { - - static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; - -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - static constexpr const int LOG_EVENT_LEN = 20; - static constexpr const int LOG_BUFFER_LEN = 30; - static constexpr const int LOG_SMALL_BUFFER_LEN = 10; - static constexpr const int LOG_INDEX_LEN = 5; - - static int log_depth; // Not threadsafe. Log only. - - // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } - } - - // Print the header and set up log_start - static simdjson_really_inline void log_start() { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); - } - } - - simdjson_unused static simdjson_really_inline void log_string(const char *message) { - if (LOG_ENABLED) { - printf("%s\n", message); - } - } - - // Logs a single line from the stage 2 DOM parser - template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { - if (LOG_ENABLED) { - printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); - auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; - auto next_index = structurals.next_structural; - auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); - auto next = &structurals.buf[*next_index]; - { - // Print the next N characters in the buffer. - printf("| "); - // Otherwise, print the characters starting from the buffer position. - // Print spaces for unprintable or newline characters. - for (int i=0;i - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; - - /** - * Create an iterator capable of walking a JSON document. - * - * The document must have already passed through stage 1. - */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); - - /** - * Look at the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *peek() const noexcept; - /** - * Advance to the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *advance() noexcept; - /** - * Get the remaining length of the document, from the start of the current token. - */ - simdjson_really_inline size_t remaining_len() const noexcept; - /** - * Check if we are at the end of the document. - * - * If this is true, there are no more tokens. - */ - simdjson_really_inline bool at_eof() const noexcept; - /** - * Check if we are at the beginning of the document. - */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; - - /** - * Log that a value has been found. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_value(const char *type) const noexcept; - /** - * Log the start of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; - /** - * Log the end of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; - /** - * Log an error. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_error(const char *error) const noexcept; - - template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; - template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; -}; - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { - logger::log_start(); - - // - // Start the document - // - if (at_eof()) { return EMPTY; } - log_start_value("document"); - SIMDJSON_TRY( visitor.visit_document_start(*this) ); - - // - // Read first value - // - { - auto value = advance(); - - // Make sure the outer object or array is closed before continuing; otherwise, there are ways we - // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 - if (!STREAMING) { - switch (*value) { - case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; - } - } - - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; - } - } - goto document_end; - -// -// Object parser states -// -object_begin: - log_start_value("object"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = false; - SIMDJSON_TRY( visitor.visit_object_start(*this) ); - - { - auto key = advance(); - if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.increment_count(*this) ); - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - -object_field: - if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -object_continue: - switch (*advance()) { - case ',': - SIMDJSON_TRY( visitor.increment_count(*this) ); - { - auto key = advance(); - if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - goto object_field; - case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; - default: log_error("No comma between object fields"); return TAPE_ERROR; - } - -scope_end: - depth--; - if (depth == 0) { goto document_end; } - if (dom_parser.is_array[depth]) { goto array_continue; } - goto object_continue; - -// -// Array parser states -// -array_begin: - log_start_value("array"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = true; - SIMDJSON_TRY( visitor.visit_array_start(*this) ); - SIMDJSON_TRY( visitor.increment_count(*this) ); - -array_value: - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -array_continue: - switch (*advance()) { - case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; - case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; - default: log_error("Missing comma between array values"); return TAPE_ERROR; - } - -document_end: - log_end_value("document"); - SIMDJSON_TRY( visitor.visit_document_end(*this) ); - - dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - - // If we didn't make it to the end, it's an error - if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { - log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - return TAPE_ERROR; - } - - return SUCCESS; - -} // walk_document() - -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { -} - -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { - return &buf[*(next_structural)]; -} -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { - return &buf[*(next_structural++)]; -} -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { - return dom_parser.len - *(next_structural-1); -} - -simdjson_really_inline bool json_iterator::at_eof() const noexcept { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; -} -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { - return next_structural == dom_parser.structural_indexes.get(); -} -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { - return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; -} - -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { - logger::log_line(*this, "", type, ""); -} - -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { - logger::log_line(*this, "+", type, ""); - if (logger::LOG_ENABLED) { logger::log_depth++; } -} - -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { - if (logger::LOG_ENABLED) { logger::log_depth--; } - logger::log_line(*this, "-", type, ""); -} - -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { - logger::log_line(*this, "", "ERROR", error); -} - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_root_string(*this, value); - case 't': return visitor.visit_root_true_atom(*this, value); - case 'f': return visitor.visit_root_false_atom(*this, value); - case 'n': return visitor.visit_root_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_root_number(*this, value); - default: - log_error("Document starts with a non-value character"); - return TAPE_ERROR; - } -} -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_string(*this, value); - case 't': return visitor.visit_true_atom(*this, value); - case 'f': return visitor.visit_false_atom(*this, value); - case 'n': return visitor.visit_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_number(*this, value); - default: - log_error("Non-value found when value was expected!"); - return TAPE_ERROR; - } -} - -} // namespace stage2 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/tape_writer.h */ -namespace simdjson { -namespace icelake { -namespace { -namespace stage2 { - -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; - - /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; - - /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; - - /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; - - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; - - /** - * Skip the current tape entry without writing. - * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_really_inline void skip() noexcept; - - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_really_inline void skip_large_integer() noexcept; - - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_really_inline void skip_double() noexcept; - - /** - * Write a value to a known location on tape. - * - * Used to go back and write out the start of a container after the container ends. - */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; - -private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct number_writer - -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); -} - -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; -} - -/** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); -} - -simdjson_really_inline void tape_writer::skip() noexcept { - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; -} - -template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); -} - -} // namespace stage2 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file src/generic/stage2/tape_writer.h */ - -namespace simdjson { -namespace icelake { -namespace { -namespace stage2 { - -struct tape_builder { - template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept; - - /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; - /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; - - /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; - /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; - /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; - - /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; - /** - * Called when a key in a field is encountered. - * - * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array - * will be called after this with the field value. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; - /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; - /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; - - /** - * Called when a string, number, boolean or null is found. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; - /** - * Called when a string, number, boolean or null is found at the top level of a document (i.e. - * when there is no array or object and the entire document is a single string, number, boolean or - * null. - * - * This is separate from primitive() because simdjson's normal primitive parsing routines assume - * there is at least one more token after the value, which is only true in an array or object. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; - - /** Next location to write to tape */ - tape_writer tape; -private: - /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; - - simdjson_really_inline tape_builder(dom::document &doc) noexcept; - - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; -}; // class tape_builder - -template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept { - dom_parser.doc = &doc; - json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); - tape_builder builder(doc); - return iter.walk_document(builder); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_root_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { - constexpr uint32_t start_tape_index = 0; - tape.append(start_tape_index, internal::tape_type::ROOT); - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { - return visit_string(iter, key, true); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 - return SUCCESS; -} - -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { - iter.log_value(key ? "key" : "string"); - uint8_t *dst = on_start_string(iter); - dst = stringparsing::parse_string(value+1, dst); - if (dst == nullptr) { - iter.log_error("Invalid escape in string"); - return STRING_ERROR; - } - on_end_string(dst); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { - return visit_string(iter, value); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("number"); - return numberparsing::parse_number(value, tape); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { - // - // We need to make a copy to make sure that the string is space terminated. - // This is not about padding the input, which should already padded up - // to len + SIMDJSON_PADDING. However, we have no control at this stage - // on how the padding was done. What if the input string was padded with nulls? - // It is quite common for an input string to have an extra null character (C string). - // We do not want to allow 9\0 (where \0 is the null character) inside a JSON - // document, but the string "9\0" by itself is fine. So we make a copy and - // pad the input with spaces when we know that there is just one input element. - // This copy is relatively expensive, but it will almost never be called in - // practice unless you are in the strange scenario where you have many JSON - // documents made of single atoms. - // - std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); - if (copy.get() == nullptr) { return MEMALLOC; } - std::memcpy(copy.get(), value, iter.remaining_len()); - std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); - error_code error = visit_number(iter, copy.get()); - return error; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -// private: - -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { - return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - auto start_index = next_tape_index(iter); - tape.append(start_index+2, start); - tape.append(start_index, end); - return SUCCESS; -} - -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); - iter.dom_parser.open_containers[iter.depth].count = 0; - tape.skip(); // We don't actually *write* the start element until the end. -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - // Write the ending tape element, pointing at the start location - const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; - tape.append(start_tape_index, end); - // Write the start tape element, pointing at the end location (and including count) - // count can overflow if it exceeds 24 bits... so we saturate - // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). - const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; - const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); - return SUCCESS; -} - -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { - // we advance the point, accounting for the fact that we have a NULL termination - tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); - return current_string_buf_loc + sizeof(uint32_t); -} - -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { - uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); - // TODO check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); - // NULL termination is still handy if you expect all your strings to - // be NULL terminated? It comes at a small cost - *dst = 0; - current_string_buf_loc = dst + 1; -} - -} // namespace stage2 -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file src/generic/stage2/tape_builder.h */ - -// -// Implementation-specific overrides -// -namespace simdjson { -namespace icelake { -namespace { -namespace stage1 { - -simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { - if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } - return find_escaped_branchless(backslash); -} - -} // namespace stage1 -} // unnamed namespace - -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - return icelake::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { - this->buf = _buf; - this->len = _len; - return icelake::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); -} - -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return icelake::stage1::generic_validate_utf8(buf,len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, stage1_mode::regular); - if (error) { return error; } - return stage2(_doc); -} - -} // namespace icelake -} // namespace simdjson - -/* begin file include/simdjson/icelake/end.h */ -SIMDJSON_UNTARGET_ICELAKE -/* end file include/simdjson/icelake/end.h */ -/* end file src/icelake/dom_parser_implementation.cpp */ -#endif -#if SIMDJSON_IMPLEMENTATION_HASWELL -/* begin file src/haswell/implementation.cpp */ -/* begin file include/simdjson/haswell/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "haswell" -// #define SIMDJSON_IMPLEMENTATION haswell -SIMDJSON_TARGET_HASWELL -/* end file include/simdjson/haswell/begin.h */ - -namespace simdjson { -namespace haswell { - -simdjson_warn_unused error_code implementation::create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr& dst -) const noexcept { - dst.reset( new (std::nothrow) dom_parser_implementation() ); - if (!dst) { return MEMALLOC; } - if (auto err = dst->set_capacity(capacity)) - return err; - if (auto err = dst->set_max_depth(max_depth)) - return err; - return SUCCESS; -} - -} // namespace haswell -} // namespace simdjson - -/* begin file include/simdjson/haswell/end.h */ -SIMDJSON_UNTARGET_HASWELL -/* end file include/simdjson/haswell/end.h */ - -/* end file src/haswell/implementation.cpp */ -/* begin file src/haswell/dom_parser_implementation.cpp */ -/* begin file include/simdjson/haswell/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "haswell" -// #define SIMDJSON_IMPLEMENTATION haswell -SIMDJSON_TARGET_HASWELL -/* end file include/simdjson/haswell/begin.h */ - -// -// Stage 1 -// - -namespace simdjson { -namespace haswell { -namespace { - -using namespace simd; - -struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); - // ASCII white-space ('\r','\n','\t',' ') - simdjson_really_inline uint64_t whitespace() const noexcept; - // non-quote structural characters (comma, colon, braces, brackets) - simdjson_really_inline uint64_t op() const noexcept; - // neither a structural character nor a white-space, so letters, numbers and quotes - simdjson_really_inline uint64_t scalar() const noexcept; - - uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') - uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) -}; - -simdjson_really_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } -simdjson_really_inline uint64_t json_character_block::op() const noexcept { return _op; } -simdjson_really_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } - -// This identifies structural characters (comma, colon, braces, brackets), -// and ASCII white-space ('\r','\n','\t',' '). -simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { - // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why - // we can't use the generic lookup_16. - const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); - - // The 6 operators (:,[]{}) have these values: - // - // , 2C - // : 3A - // [ 5B - // { 7B - // ] 5D - // } 7D - // - // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. - // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then - // match it (against | 0x20). - // - // To prevent recognizing other characters, everything else gets compared with 0, which cannot - // match due to the | 0x20. - // - // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , - // and :. This gets caught in stage 2, which checks the actual character to ensure the right - // operators are in the right places. - const auto op_table = simd8::repeat_16( - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B - ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D - ); - - // We compute whitespace and op separately. If later code only uses one or the - // other, given the fact that all functions are aggressively inlined, we can - // hope that useless computations will be omitted. This is namely case when - // minifying (we only need whitespace). - - const uint64_t whitespace = in.eq({ - _mm256_shuffle_epi8(whitespace_table, in.chunks[0]), - _mm256_shuffle_epi8(whitespace_table, in.chunks[1]) - }); - // Turn [ and ] into { and } - const simd8x64 curlified{ - in.chunks[0] | 0x20, - in.chunks[1] | 0x20 - }; - const uint64_t op = curlified.eq({ - _mm256_shuffle_epi8(op_table, in.chunks[0]), - _mm256_shuffle_epi8(op_table, in.chunks[1]) - }); - - return { whitespace, op }; -} - -simdjson_really_inline bool is_ascii(const simd8x64& input) { - return input.reduce_or().is_ascii(); -} - -simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); -} - -simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_third_byte | is_fourth_byte) > int8_t(0); -} - -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ -namespace simdjson { -namespace haswell { -namespace { -namespace utf8_validation { - -using namespace simd; - - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { -// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) -// Bit 1 = Too Long (ASCII followed by continuation) -// Bit 2 = Overlong 3-byte -// Bit 4 = Surrogate -// Bit 5 = Overlong 2-byte -// Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1<<6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); - constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, - CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 - ); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); - return (byte_1_high & byte_1_low & byte_2_high); - } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, - const simd8 prev_input, const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; - } - - // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. - // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ -#if SIMDJSON_IMPLEMENTATION_ICELAKE - static const uint8_t max_array[64] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; -#else - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; -#endif - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); - } - - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes - // too large in the first of two bytes. - simdjson_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } - - simdjson_really_inline void check_next_input(const simd8x64& input) { - if(simdjson_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 1) - ||(simd8x64::NUM_CHUNKS == 2) - || (simd8x64::NUM_CHUNKS == 4), - "We support one, two or four chunks per 64-byte block."); - if(simd8x64::NUM_CHUNKS == 1) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if(simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if(simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } - } - // do not forget to call check_eof! - simdjson_really_inline error_code errors() { - return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; - } - - }; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ -/* begin file src/generic/stage1/json_structural_indexer.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -/* begin file src/generic/stage1/buf_block_reader.h */ -namespace simdjson { -namespace haswell { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with spaces -template -struct buf_block_reader { -public: - simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this - * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there - * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdjson_really_inline size_t get_remainder(uint8_t *dst) const; - simdjson_really_inline void advance(); -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text_64(const uint8_t *text) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text(const simd8x64& in) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdjson_unused static char * format_mask(uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} - -template -simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } - -template -simdjson_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { - if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdjson_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file src/generic/stage1/buf_block_reader.h */ -/* begin file src/generic/stage1/json_string_scanner.h */ -namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { - -struct json_string_block { - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : - _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} - - // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } - // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } - // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } - // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } - // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } - // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } - // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } - - // backslash characters - uint64_t _backslash; - // escaped characters (backslashed--does not include the hex characters after \u) - uint64_t _escaped; - // real quotes (non-backslashed ones) - uint64_t _quote; - // string characters (includes start quote but not end quote) - uint64_t _in_string; -}; - -// Scans blocks for string characters, storing the state necessary to do so -class json_string_scanner { -public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); - - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; - // Whether the first character of the next iteration is escaped. - uint64_t prev_escaped = 0ULL; -}; - -// -// Finds escaped characters (characters following \). -// -// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). -// -// Does this by: -// - Shift the escape mask to get potentially escaped characters (characters after backslashes). -// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) -// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) -// -// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all -// escape sequences, filters out the ones that start on even bits, and adds that to the mask of -// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since -// the start bit causes a carry), and leaves even-bit sequences alone. -// -// Example: -// -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape -// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape -// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later -// invert_mask | | cxxx c xx c| even_seq << 1 -// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit -// escaped | x | x x x x x x x x | -// desired | x | x x x x x x x x | -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// -simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { - // If there was overflow, pretend the first character isn't a backslash - backslash &= ~prev_escaped; - uint64_t follows_escape = backslash << 1 | prev_escaped; - - // Get sequences starting on even bits by clearing out the odd series using + - const uint64_t even_bits = 0x5555555555555555ULL; - uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; - uint64_t sequences_starting_on_even_bits; - prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); - uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. - - // Mask every other backslashed character as an escaped character - // Flip the mask for sequences that start on even bits, to correct them - return (even_bits ^ invert_mask) & follows_escape; -} - -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = find_escaped(backslash); - const uint64_t quote = in.eq('"') & ~escaped; - - // - // prefix_xor flips on bits inside the string (and flips off the end quote). - // - // Then we xor with prev_in_string: if we were in a string already, its effect is flipped - // (characters inside strings are outside, and characters outside strings are inside). - // - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; - - // - // Check if we're still in a string at the end of the box so the next block will know - // - // right shift of a signed value expected to be well-defined and standard - // compliant as of C++20, John Regher from Utah U. says this is fine code - // - prev_in_string = uint64_t(static_cast(in_string) >> 63); - - // Use ^ to turn the beginning quote off, and the end quote on. - - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_string_block( - backslash, - escaped, - quote, - in_string - ); -} - -simdjson_really_inline error_code json_string_scanner::finish() { - if (prev_in_string) { - return UNCLOSED_STRING; - } - return SUCCESS; -} - -} // namespace stage1 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file src/generic/stage1/json_string_scanner.h */ -/* begin file src/generic/stage1/json_scanner.h */ -namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { - -/** - * A block of scanned json, with information on operators and scalars. - * - * We seek to identify pseudo-structural characters. Anything that is inside - * a string must be omitted (hence & ~_string.string_tail()). - * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The - * term 'structural character' is from the JSON RFC. - * 2. We have the 'scalar pseudo-structural characters'. - * Scalars are quotes, and any character except structural characters and white space. - * - * To identify the scalar pseudo-structural characters, we must look at what comes - * before them: it must be a space, a quote or a structural characters. - * Starting with simdjson v0.3, we identify them by - * negation: we identify everything that is followed by a non-quote scalar, - * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. - */ -struct json_block { -public: - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - - /** - * The start of structurals. - * In simdjson prior to v0.3, these were called the pseudo-structural characters. - **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } - /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } - - // Helpers - - /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } - /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } - - // string and escape characters - json_string_block _string; - // whitespace, structural characters ('operators'), scalars - json_character_block _characters; - // whether the previous character was a scalar - uint64_t _follows_potential_nonquote_scalar; -private: - // Potential structurals (i.e. disregarding strings) - - /** - * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". - * They may reside inside a string. - **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } - /** - * The start of non-operator runs, like 123, true and "abc". - * It main reside inside a string. - **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space - // then we know that it is irrelevant structurally. - return _characters.scalar() & ~follows_potential_scalar(); - } - /** - * Whether the given character is immediately after a non-operator like 123, true. - * The characters following a quote are not included. - */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { - // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character - // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a - // white space. - // It is understood that within quoted region, anything at all could be marked (irrelevant). - return _follows_potential_nonquote_scalar; - } -}; - -/** - * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. - * - * The scanner starts by calculating two distinct things: - * - string characters (taking \" into account) - * - structural characters or 'operators' ([]{},:, comma) - * and scalars (runs of non-operators like 123, true and "abc") - * - * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: - * in particular, the operator/scalar bit will find plenty of things that are actually part of - * strings. When we're done, json_block will fuse the two together by masking out tokens that are - * part of a string. - */ -class json_scanner { -public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Whether the last character of the previous iteration is part of a scalar token - // (anything except whitespace or a structural character/'operator'). - uint64_t prev_scalar = 0ULL; - json_string_scanner string_scanner{}; -}; - - -// -// Check if the current character immediately follows a matching character. -// -// For example, this checks for quotes with backslashes in front of them: -// -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); -// -simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; -} - -simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { - json_string_block strings = string_scanner.next(in); - // identifies the white-space and the structural characters - json_character_block characters = json_character_block::classify(in); - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). - // - // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) - // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential - // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we - // may need to add an extra check when parsing strings. - // - // Performance: there are many ways to skin this cat. - const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); - uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_block( - strings,// strings is a function-local object so either it moves or the copy is elided. - characters, - follows_nonquote_scalar - ); -} - -simdjson_really_inline error_code json_scanner::finish() { - return string_scanner.finish(); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file src/generic/stage1/json_scanner.h */ -/* begin file src/generic/stage1/json_minifier.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { - -class json_minifier { -public: - template - static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; - -private: - simdjson_really_inline json_minifier(uint8_t *_dst) - : dst{_dst} - {} - template - simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); - json_scanner scanner{}; - uint8_t *dst; -}; - -simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { - uint64_t mask = block.whitespace(); - dst += in.compress(mask, dst); -} - -simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { - error_code error = scanner.finish(); - if (error) { dst_len = 0; return error; } - dst_len = dst - dst_start; - return SUCCESS; -} - -template<> -simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - simd::simd8x64 in_2(block_buf+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1); - this->next(in_2, block_2); - reader.advance(); -} - -template<> -simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - json_block block_1 = scanner.next(in_1); - this->next(block_buf, block_1); - reader.advance(); -} - -template -error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { - buf_block_reader reader(buf, len); - json_minifier minifier(dst); - - // Index the first n-1 blocks - while (reader.has_full_block()) { - minifier.step(reader.full_block(), reader); - } - - // Index the last (remainder) block, padded with spaces - uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); - if (remaining_bytes > 0) { - // We do not want to write directly to the output stream. Rather, we write - // to a local buffer (for safety). - uint8_t out_block[STEP_SIZE]; - uint8_t * const guarded_dst{minifier.dst}; - minifier.dst = out_block; - minifier.step(block, reader); - size_t to_write = minifier.dst - out_block; - // In some cases, we could be enticed to consider the padded spaces - // as part of the string. This is fine as long as we do not write more - // than we consumed. - if(to_write > remaining_bytes) { to_write = remaining_bytes; } - memcpy(guarded_dst, out_block, to_write); - minifier.dst = guarded_dst + to_write; - } - return minifier.finish(dst, dst_len); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file src/generic/stage1/json_minifier.h */ -/* begin file src/generic/stage1/find_next_document_index.h */ -namespace simdjson { -namespace haswell { -namespace { - -/** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ':' ',' - * and when the second element is NOT one of these characters: '}' ']' ':' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // Variant: do not count separately, just figure out depth - if(parser.n_structural_indexes == 0) { return 0; } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; - } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; - } - // If we made it to the end, we want to finish counting to see if we have a full document. - switch (parser.buf[parser.structural_indexes[0]]) { - case '}': - obj_cnt--; - break; - case ']': - arr_cnt--; - break; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - if (!arr_cnt && !obj_cnt) { - // We have a complete document. - return parser.n_structural_indexes; - } - return 0; -} - -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file src/generic/stage1/find_next_document_index.h */ - -namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { - -class bit_indexer { -public: - uint32_t *tail; - - simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} - - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this - // - // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own - // version of the code. -#ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); -#else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; -#if defined(SIMDJSON_PREFER_REVERSE_BITS) - /** - * ARM lacks a fast trailing zero instruction, but it has a fast - * bit reversal instruction and a fast leading zero instruction. - * Thus it may be profitable to reverse the bits (once) and then - * to rely on a sequence of instructions that call the leading - * zero instruction. - * - * Performance notes: - * The chosen routine is not optimal in terms of data dependency - * since zero_leading_bit might require two instructions. However, - * it tends to minimize the total number of instructions which is - * beneficial. - */ - - uint64_t rev_bits = reverse_bits(bits); - int cnt = static_cast(count_ones(bits)); - int i = 0; - // Do the first 8 all together - for (; i<8; i++) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (simdjson_unlikely(cnt > 8)) { - i = 8; - for (; i<16; i++) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (simdjson_unlikely(cnt > 16)) { - i = 16; - while (rev_bits != 0) { - int lz = leading_zeroes(rev_bits); - this->tail[i++] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - } - } - this->tail += cnt; -#else // SIMDJSON_PREFER_REVERSE_BITS - /** - * Under recent x64 systems, we often have both a fast trailing zero - * instruction and a fast 'clear-lower-bit' instruction so the following - * algorithm can be competitive. - */ - - int cnt = static_cast(count_ones(bits)); - // Do the first 8 all together - for (int i=0; i<8; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (simdjson_unlikely(cnt > 8)) { - for (int i=8; i<16; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (simdjson_unlikely(cnt > 16)) { - int i = 16; - do { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - i++; - } while (i < cnt); - } - } - - this->tail += cnt; -#endif - } -#endif // SIMDJSON_CUSTOM_BIT_INDEXER - -}; - -class json_structural_indexer { -public: - /** - * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. - * - * @param partial Setting the partial parameter to true allows the find_structural_bits to - * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If - * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. - */ - template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; - -private: - simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); - template - simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); - - json_scanner scanner{}; - utf8_checker checker{}; - bit_indexer indexer; - uint64_t prev_structurals = 0; - uint64_t unescaped_chars_error = 0; -}; - -simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} - -// Skip the last character if it is partial -simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { - if (simdjson_unlikely(len < 3)) { - switch (len) { - case 2: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left - return len; - case 1: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - return len; - case 0: - return len; - } - } - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left - return len; -} - -// -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. -// -template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { - if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - // We guard the rest of the code so that we can assume that len > 0 throughout. - if (len == 0) { return EMPTY; } - if (is_streaming(partial)) { - len = trim_partial_utf8(buf, len); - // If you end up with an empty window after trimming - // the partial UTF-8 bytes, then chances are good that you - // have an UTF-8 formatting error. - if(len == 0) { return UTF8_ERROR; } - } - buf_block_reader reader(buf, len); - json_structural_indexer indexer(parser.structural_indexes.get()); - - // Read all but the last block - while (reader.has_full_block()) { - indexer.step(reader.full_block(), reader); - } - // Take care of the last block (will always be there unless file is empty which is - // not supposed to happen.) - uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } - indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block); - simd::simd8x64 in_2(block+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1, reader.block_index()); - this->next(in_2, block_2, reader.block_index()+64); - reader.advance(); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block); - json_block block_1 = scanner.next(in_1); - this->next(in_1, block_1, reader.block_index()); - reader.advance(); -} - -simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { - uint64_t unescaped = in.lteq(0x1F); - checker.check_next_input(in); - indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser - prev_structurals = block.structural_start(); - unescaped_chars_error |= block.non_quote_inside_string(unescaped); -} - -simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { - // Write out the final iteration's structurals - indexer.write(uint32_t(idx-64), prev_structurals); - error_code error = scanner.finish(); - // We deliberately break down the next expression so that it is - // human readable. - const bool should_we_exit = is_streaming(partial) ? - ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING - : (error != SUCCESS); // if partial is false, we must have SUCCESS - const bool have_unclosed_string = (error == UNCLOSED_STRING); - if (simdjson_unlikely(should_we_exit)) { return error; } - - if (unescaped_chars_error) { - return UNESCAPED_CHARS; - } - parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); - /*** - * The On Demand API requires special padding. - * - * This is related to https://github.com/simdjson/simdjson/issues/906 - * Basically, we want to make sure that if the parsing continues beyond the last (valid) - * structural character, it quickly stops. - * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. - * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing - * continues, then it must be [,] or }. - * Suppose it is ] or }. We backtrack to the first character, what could it be that would - * not trigger an error? It could be ] or } but no, because you can't start a document that way. - * It can't be a comma, a colon or any simple value. So the only way we could continue is - * if the repeated character is [. But if so, the document must start with [. But if the document - * starts with [, it should end with ]. If we enforce that rule, then we would get - * ][[ which is invalid. - * - * This is illustrated with the test array_iterate_unclosed_error() on the following input: - * R"({ "a": [,,)" - **/ - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final - parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); - parser.structural_indexes[parser.n_structural_indexes + 2] = 0; - parser.next_structural_index = 0; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - return EMPTY; - } - if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { - return UNEXPECTED_ERROR; - } - if (partial == stage1_mode::streaming_partial) { - // If we have an unclosed string, then the last structural - // will be the quote and we want to make sure to omit it. - if(have_unclosed_string) { - parser.n_structural_indexes--; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } - } - // We truncate the input to the end of the last complete document (or zero). - auto new_structural_indexes = find_next_document_index(parser); - if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - if(parser.structural_indexes[0] == 0) { - // If the buffer is partial and we started at index 0 but the document is - // incomplete, it's too big to parse. - return CAPACITY; - } else { - // It is possible that the document could be parsed, we just had a lot - // of white space. - parser.n_structural_indexes = 0; - return EMPTY; - } - } - - parser.n_structural_indexes = new_structural_indexes; - } else if (partial == stage1_mode::streaming_final) { - if(have_unclosed_string) { parser.n_structural_indexes--; } - // We truncate the input to the end of the last complete document (or zero). - // Because partial == stage1_mode::streaming_final, it means that we may - // silently ignore trailing garbage. Though it sounds bad, we do it - // deliberately because many people who have streams of JSON documents - // will truncate them for processing. E.g., imagine that you are uncompressing - // the data from a size file or receiving it in chunks from the network. You - // may not know where exactly the last document will be. Meanwhile the - // document_stream instances allow people to know the JSON documents they are - // parsing (see the iterator.source() method). - parser.n_structural_indexes = find_next_document_index(parser); - // We store the initial n_structural_indexes so that the client can see - // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, - // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, - // otherwise, it will copy some prior index. - parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; - // This next line is critical, do not change it unless you understand what you are - // doing. - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - // We tolerate an unclosed string at the very end of the stream. Indeed, users - // often load their data in bulk without being careful and they want us to ignore - // the trailing garbage. - return EMPTY; - } - } - checker.check_eof(); - return checker.errors(); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file src/generic/stage1/json_structural_indexer.h */ -/* begin file src/generic/stage1/utf8_validator.h */ -namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t * input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return c.errors() == error_code::SUCCESS; -} - -bool generic_validate_utf8(const char * input, size_t length) { - return generic_validate_utf8(reinterpret_cast(input),length); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file src/generic/stage1/utf8_validator.h */ - -// -// Stage 2 -// -/* begin file src/generic/stage2/tape_builder.h */ -/* begin file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/logger.h */ -// This is for an internal-only stage 2 specific logger. -// Set LOG_ENABLED = true to log what stage 2 is doing! -namespace simdjson { -namespace haswell { -namespace { -namespace logger { - - static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; - -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - static constexpr const int LOG_EVENT_LEN = 20; - static constexpr const int LOG_BUFFER_LEN = 30; - static constexpr const int LOG_SMALL_BUFFER_LEN = 10; - static constexpr const int LOG_INDEX_LEN = 5; - - static int log_depth; // Not threadsafe. Log only. - - // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } - } - - // Print the header and set up log_start - static simdjson_really_inline void log_start() { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); - } - } - - simdjson_unused static simdjson_really_inline void log_string(const char *message) { - if (LOG_ENABLED) { - printf("%s\n", message); - } - } - - // Logs a single line from the stage 2 DOM parser - template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { - if (LOG_ENABLED) { - printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); - auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; - auto next_index = structurals.next_structural; - auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); - auto next = &structurals.buf[*next_index]; - { - // Print the next N characters in the buffer. - printf("| "); - // Otherwise, print the characters starting from the buffer position. - // Print spaces for unprintable or newline characters. - for (int i=0;i - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; - - /** - * Create an iterator capable of walking a JSON document. - * - * The document must have already passed through stage 1. - */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); - - /** - * Look at the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *peek() const noexcept; - /** - * Advance to the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *advance() noexcept; - /** - * Get the remaining length of the document, from the start of the current token. - */ - simdjson_really_inline size_t remaining_len() const noexcept; - /** - * Check if we are at the end of the document. - * - * If this is true, there are no more tokens. - */ - simdjson_really_inline bool at_eof() const noexcept; - /** - * Check if we are at the beginning of the document. - */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; - - /** - * Log that a value has been found. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_value(const char *type) const noexcept; - /** - * Log the start of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; - /** - * Log the end of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; - /** - * Log an error. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_error(const char *error) const noexcept; - - template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; - template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; -}; - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { - logger::log_start(); - - // - // Start the document - // - if (at_eof()) { return EMPTY; } - log_start_value("document"); - SIMDJSON_TRY( visitor.visit_document_start(*this) ); - - // - // Read first value - // - { - auto value = advance(); - - // Make sure the outer object or array is closed before continuing; otherwise, there are ways we - // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 - if (!STREAMING) { - switch (*value) { - case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; - } - } - - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; - } - } - goto document_end; - -// -// Object parser states -// -object_begin: - log_start_value("object"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = false; - SIMDJSON_TRY( visitor.visit_object_start(*this) ); - - { - auto key = advance(); - if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.increment_count(*this) ); - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - -object_field: - if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -object_continue: - switch (*advance()) { - case ',': - SIMDJSON_TRY( visitor.increment_count(*this) ); - { - auto key = advance(); - if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - goto object_field; - case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; - default: log_error("No comma between object fields"); return TAPE_ERROR; - } - -scope_end: - depth--; - if (depth == 0) { goto document_end; } - if (dom_parser.is_array[depth]) { goto array_continue; } - goto object_continue; - -// -// Array parser states -// -array_begin: - log_start_value("array"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = true; - SIMDJSON_TRY( visitor.visit_array_start(*this) ); - SIMDJSON_TRY( visitor.increment_count(*this) ); - -array_value: - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -array_continue: - switch (*advance()) { - case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; - case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; - default: log_error("Missing comma between array values"); return TAPE_ERROR; - } - -document_end: - log_end_value("document"); - SIMDJSON_TRY( visitor.visit_document_end(*this) ); - - dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - - // If we didn't make it to the end, it's an error - if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { - log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - return TAPE_ERROR; - } - - return SUCCESS; - -} // walk_document() - -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { -} - -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { - return &buf[*(next_structural)]; -} -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { - return &buf[*(next_structural++)]; -} -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { - return dom_parser.len - *(next_structural-1); -} - -simdjson_really_inline bool json_iterator::at_eof() const noexcept { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; -} -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { - return next_structural == dom_parser.structural_indexes.get(); -} -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { - return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; -} - -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { - logger::log_line(*this, "", type, ""); -} - -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { - logger::log_line(*this, "+", type, ""); - if (logger::LOG_ENABLED) { logger::log_depth++; } -} - -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { - if (logger::LOG_ENABLED) { logger::log_depth--; } - logger::log_line(*this, "-", type, ""); -} - -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { - logger::log_line(*this, "", "ERROR", error); -} - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_root_string(*this, value); - case 't': return visitor.visit_root_true_atom(*this, value); - case 'f': return visitor.visit_root_false_atom(*this, value); - case 'n': return visitor.visit_root_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_root_number(*this, value); - default: - log_error("Document starts with a non-value character"); - return TAPE_ERROR; - } -} -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_string(*this, value); - case 't': return visitor.visit_true_atom(*this, value); - case 'f': return visitor.visit_false_atom(*this, value); - case 'n': return visitor.visit_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_number(*this, value); - default: - log_error("Non-value found when value was expected!"); - return TAPE_ERROR; - } -} - -} // namespace stage2 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/tape_writer.h */ -namespace simdjson { -namespace haswell { -namespace { -namespace stage2 { - -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; - - /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; - - /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; - - /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; - - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; - - /** - * Skip the current tape entry without writing. - * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_really_inline void skip() noexcept; - - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_really_inline void skip_large_integer() noexcept; - - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_really_inline void skip_double() noexcept; - - /** - * Write a value to a known location on tape. - * - * Used to go back and write out the start of a container after the container ends. - */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; - -private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct number_writer - -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); -} - -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; -} - -/** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); -} - -simdjson_really_inline void tape_writer::skip() noexcept { - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; -} - -template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); -} - -} // namespace stage2 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file src/generic/stage2/tape_writer.h */ - -namespace simdjson { -namespace haswell { -namespace { -namespace stage2 { - -struct tape_builder { - template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept; - - /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; - /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; - - /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; - /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; - /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; - - /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; - /** - * Called when a key in a field is encountered. - * - * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array - * will be called after this with the field value. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; - /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; - /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; - - /** - * Called when a string, number, boolean or null is found. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; - /** - * Called when a string, number, boolean or null is found at the top level of a document (i.e. - * when there is no array or object and the entire document is a single string, number, boolean or - * null. - * - * This is separate from primitive() because simdjson's normal primitive parsing routines assume - * there is at least one more token after the value, which is only true in an array or object. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; - - /** Next location to write to tape */ - tape_writer tape; -private: - /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; - - simdjson_really_inline tape_builder(dom::document &doc) noexcept; - - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; -}; // class tape_builder - -template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept { - dom_parser.doc = &doc; - json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); - tape_builder builder(doc); - return iter.walk_document(builder); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_root_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { - constexpr uint32_t start_tape_index = 0; - tape.append(start_tape_index, internal::tape_type::ROOT); - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { - return visit_string(iter, key, true); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 - return SUCCESS; -} - -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { - iter.log_value(key ? "key" : "string"); - uint8_t *dst = on_start_string(iter); - dst = stringparsing::parse_string(value+1, dst); - if (dst == nullptr) { - iter.log_error("Invalid escape in string"); - return STRING_ERROR; - } - on_end_string(dst); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { - return visit_string(iter, value); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("number"); - return numberparsing::parse_number(value, tape); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { - // - // We need to make a copy to make sure that the string is space terminated. - // This is not about padding the input, which should already padded up - // to len + SIMDJSON_PADDING. However, we have no control at this stage - // on how the padding was done. What if the input string was padded with nulls? - // It is quite common for an input string to have an extra null character (C string). - // We do not want to allow 9\0 (where \0 is the null character) inside a JSON - // document, but the string "9\0" by itself is fine. So we make a copy and - // pad the input with spaces when we know that there is just one input element. - // This copy is relatively expensive, but it will almost never be called in - // practice unless you are in the strange scenario where you have many JSON - // documents made of single atoms. - // - std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); - if (copy.get() == nullptr) { return MEMALLOC; } - std::memcpy(copy.get(), value, iter.remaining_len()); - std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); - error_code error = visit_number(iter, copy.get()); - return error; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -// private: - -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { - return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - auto start_index = next_tape_index(iter); - tape.append(start_index+2, start); - tape.append(start_index, end); - return SUCCESS; -} - -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); - iter.dom_parser.open_containers[iter.depth].count = 0; - tape.skip(); // We don't actually *write* the start element until the end. -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - // Write the ending tape element, pointing at the start location - const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; - tape.append(start_tape_index, end); - // Write the start tape element, pointing at the end location (and including count) - // count can overflow if it exceeds 24 bits... so we saturate - // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). - const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; - const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); - return SUCCESS; -} - -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { - // we advance the point, accounting for the fact that we have a NULL termination - tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); - return current_string_buf_loc + sizeof(uint32_t); -} - -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { - uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); - // TODO check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); - // NULL termination is still handy if you expect all your strings to - // be NULL terminated? It comes at a small cost - *dst = 0; - current_string_buf_loc = dst + 1; -} - -} // namespace stage2 -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file src/generic/stage2/tape_builder.h */ - -// -// Implementation-specific overrides -// -namespace simdjson { -namespace haswell { -namespace { -namespace stage1 { - -simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { - if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } - return find_escaped_branchless(backslash); -} - -} // namespace stage1 -} // unnamed namespace - -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { - this->buf = _buf; - this->len = _len; - return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); -} - -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return haswell::stage1::generic_validate_utf8(buf,len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, stage1_mode::regular); - if (error) { return error; } - return stage2(_doc); -} - -} // namespace haswell -} // namespace simdjson - -/* begin file include/simdjson/haswell/end.h */ -SIMDJSON_UNTARGET_HASWELL -/* end file include/simdjson/haswell/end.h */ -/* end file src/haswell/dom_parser_implementation.cpp */ -#endif -#if SIMDJSON_IMPLEMENTATION_PPC64 -/* begin file src/ppc64/implementation.cpp */ -/* begin file include/simdjson/ppc64/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "ppc64" -// #define SIMDJSON_IMPLEMENTATION ppc64 -/* end file include/simdjson/ppc64/begin.h */ - -namespace simdjson { -namespace ppc64 { - -simdjson_warn_unused error_code implementation::create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr& dst -) const noexcept { - dst.reset( new (std::nothrow) dom_parser_implementation() ); - if (!dst) { return MEMALLOC; } - if (auto err = dst->set_capacity(capacity)) - return err; - if (auto err = dst->set_max_depth(max_depth)) - return err; - return SUCCESS; -} - -} // namespace ppc64 -} // namespace simdjson - -/* begin file include/simdjson/ppc64/end.h */ -/* end file include/simdjson/ppc64/end.h */ -/* end file src/ppc64/implementation.cpp */ -/* begin file src/ppc64/dom_parser_implementation.cpp */ -/* begin file include/simdjson/ppc64/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "ppc64" -// #define SIMDJSON_IMPLEMENTATION ppc64 -/* end file include/simdjson/ppc64/begin.h */ - -// -// Stage 1 -// -namespace simdjson { -namespace ppc64 { -namespace { - -using namespace simd; - -struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); - - simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_really_inline uint64_t op() const noexcept { return _op; } - simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } - - uint64_t _whitespace; - uint64_t _op; -}; - -simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { - const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); - const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); - - simd8x64 v( - (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), - (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), - (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), - (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) - ); - - uint64_t op = simd8x64( - v.chunks[0].any_bits_set(0x7), - v.chunks[1].any_bits_set(0x7), - v.chunks[2].any_bits_set(0x7), - v.chunks[3].any_bits_set(0x7) - ).to_bitmask(); - - uint64_t whitespace = simd8x64( - v.chunks[0].any_bits_set(0x18), - v.chunks[1].any_bits_set(0x18), - v.chunks[2].any_bits_set(0x18), - v.chunks[3].any_bits_set(0x18) - ).to_bitmask(); - - return { whitespace, op }; -} - -simdjson_really_inline bool is_ascii(const simd8x64& input) { - // careful: 0x80 is not ascii. - return input.reduce_or().saturating_sub(0b01111111u).bits_not_set_anywhere(); -} - -simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); -} - -simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_third_byte | is_fourth_byte) > int8_t(0); -} - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson - -/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ -namespace simdjson { -namespace ppc64 { -namespace { -namespace utf8_validation { - -using namespace simd; - - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { -// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) -// Bit 1 = Too Long (ASCII followed by continuation) -// Bit 2 = Overlong 3-byte -// Bit 4 = Surrogate -// Bit 5 = Overlong 2-byte -// Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1<<6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); - constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, - CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 - ); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); - return (byte_1_high & byte_1_low & byte_2_high); - } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, - const simd8 prev_input, const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; - } - - // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. - // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ -#if SIMDJSON_IMPLEMENTATION_ICELAKE - static const uint8_t max_array[64] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; -#else - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; -#endif - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); - } - - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes - // too large in the first of two bytes. - simdjson_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } - - simdjson_really_inline void check_next_input(const simd8x64& input) { - if(simdjson_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 1) - ||(simd8x64::NUM_CHUNKS == 2) - || (simd8x64::NUM_CHUNKS == 4), - "We support one, two or four chunks per 64-byte block."); - if(simd8x64::NUM_CHUNKS == 1) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if(simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if(simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } - } - // do not forget to call check_eof! - simdjson_really_inline error_code errors() { - return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; - } - - }; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ -/* begin file src/generic/stage1/json_structural_indexer.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -/* begin file src/generic/stage1/buf_block_reader.h */ -namespace simdjson { -namespace ppc64 { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with spaces -template -struct buf_block_reader { -public: - simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this - * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there - * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdjson_really_inline size_t get_remainder(uint8_t *dst) const; - simdjson_really_inline void advance(); -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text_64(const uint8_t *text) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text(const simd8x64& in) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdjson_unused static char * format_mask(uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} - -template -simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } - -template -simdjson_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { - if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdjson_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file src/generic/stage1/buf_block_reader.h */ -/* begin file src/generic/stage1/json_string_scanner.h */ -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage1 { - -struct json_string_block { - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : - _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} - - // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } - // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } - // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } - // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } - // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } - // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } - // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } - - // backslash characters - uint64_t _backslash; - // escaped characters (backslashed--does not include the hex characters after \u) - uint64_t _escaped; - // real quotes (non-backslashed ones) - uint64_t _quote; - // string characters (includes start quote but not end quote) - uint64_t _in_string; -}; - -// Scans blocks for string characters, storing the state necessary to do so -class json_string_scanner { -public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); - - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; - // Whether the first character of the next iteration is escaped. - uint64_t prev_escaped = 0ULL; -}; - -// -// Finds escaped characters (characters following \). -// -// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). -// -// Does this by: -// - Shift the escape mask to get potentially escaped characters (characters after backslashes). -// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) -// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) -// -// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all -// escape sequences, filters out the ones that start on even bits, and adds that to the mask of -// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since -// the start bit causes a carry), and leaves even-bit sequences alone. -// -// Example: -// -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape -// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape -// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later -// invert_mask | | cxxx c xx c| even_seq << 1 -// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit -// escaped | x | x x x x x x x x | -// desired | x | x x x x x x x x | -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// -simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { - // If there was overflow, pretend the first character isn't a backslash - backslash &= ~prev_escaped; - uint64_t follows_escape = backslash << 1 | prev_escaped; - - // Get sequences starting on even bits by clearing out the odd series using + - const uint64_t even_bits = 0x5555555555555555ULL; - uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; - uint64_t sequences_starting_on_even_bits; - prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); - uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. - - // Mask every other backslashed character as an escaped character - // Flip the mask for sequences that start on even bits, to correct them - return (even_bits ^ invert_mask) & follows_escape; -} - -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = find_escaped(backslash); - const uint64_t quote = in.eq('"') & ~escaped; - - // - // prefix_xor flips on bits inside the string (and flips off the end quote). - // - // Then we xor with prev_in_string: if we were in a string already, its effect is flipped - // (characters inside strings are outside, and characters outside strings are inside). - // - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; - - // - // Check if we're still in a string at the end of the box so the next block will know - // - // right shift of a signed value expected to be well-defined and standard - // compliant as of C++20, John Regher from Utah U. says this is fine code - // - prev_in_string = uint64_t(static_cast(in_string) >> 63); - - // Use ^ to turn the beginning quote off, and the end quote on. - - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_string_block( - backslash, - escaped, - quote, - in_string - ); -} - -simdjson_really_inline error_code json_string_scanner::finish() { - if (prev_in_string) { - return UNCLOSED_STRING; - } - return SUCCESS; -} - -} // namespace stage1 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file src/generic/stage1/json_string_scanner.h */ -/* begin file src/generic/stage1/json_scanner.h */ -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage1 { - -/** - * A block of scanned json, with information on operators and scalars. - * - * We seek to identify pseudo-structural characters. Anything that is inside - * a string must be omitted (hence & ~_string.string_tail()). - * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The - * term 'structural character' is from the JSON RFC. - * 2. We have the 'scalar pseudo-structural characters'. - * Scalars are quotes, and any character except structural characters and white space. - * - * To identify the scalar pseudo-structural characters, we must look at what comes - * before them: it must be a space, a quote or a structural characters. - * Starting with simdjson v0.3, we identify them by - * negation: we identify everything that is followed by a non-quote scalar, - * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. - */ -struct json_block { -public: - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - - /** - * The start of structurals. - * In simdjson prior to v0.3, these were called the pseudo-structural characters. - **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } - /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } - - // Helpers - - /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } - /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } - - // string and escape characters - json_string_block _string; - // whitespace, structural characters ('operators'), scalars - json_character_block _characters; - // whether the previous character was a scalar - uint64_t _follows_potential_nonquote_scalar; -private: - // Potential structurals (i.e. disregarding strings) - - /** - * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". - * They may reside inside a string. - **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } - /** - * The start of non-operator runs, like 123, true and "abc". - * It main reside inside a string. - **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space - // then we know that it is irrelevant structurally. - return _characters.scalar() & ~follows_potential_scalar(); - } - /** - * Whether the given character is immediately after a non-operator like 123, true. - * The characters following a quote are not included. - */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { - // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character - // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a - // white space. - // It is understood that within quoted region, anything at all could be marked (irrelevant). - return _follows_potential_nonquote_scalar; - } -}; - -/** - * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. - * - * The scanner starts by calculating two distinct things: - * - string characters (taking \" into account) - * - structural characters or 'operators' ([]{},:, comma) - * and scalars (runs of non-operators like 123, true and "abc") - * - * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: - * in particular, the operator/scalar bit will find plenty of things that are actually part of - * strings. When we're done, json_block will fuse the two together by masking out tokens that are - * part of a string. - */ -class json_scanner { -public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Whether the last character of the previous iteration is part of a scalar token - // (anything except whitespace or a structural character/'operator'). - uint64_t prev_scalar = 0ULL; - json_string_scanner string_scanner{}; -}; - - -// -// Check if the current character immediately follows a matching character. -// -// For example, this checks for quotes with backslashes in front of them: -// -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); -// -simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; -} - -simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { - json_string_block strings = string_scanner.next(in); - // identifies the white-space and the structural characters - json_character_block characters = json_character_block::classify(in); - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). - // - // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) - // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential - // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we - // may need to add an extra check when parsing strings. - // - // Performance: there are many ways to skin this cat. - const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); - uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_block( - strings,// strings is a function-local object so either it moves or the copy is elided. - characters, - follows_nonquote_scalar - ); -} - -simdjson_really_inline error_code json_scanner::finish() { - return string_scanner.finish(); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file src/generic/stage1/json_scanner.h */ -/* begin file src/generic/stage1/json_minifier.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage1 { - -class json_minifier { -public: - template - static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; - -private: - simdjson_really_inline json_minifier(uint8_t *_dst) - : dst{_dst} - {} - template - simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); - json_scanner scanner{}; - uint8_t *dst; -}; - -simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { - uint64_t mask = block.whitespace(); - dst += in.compress(mask, dst); -} - -simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { - error_code error = scanner.finish(); - if (error) { dst_len = 0; return error; } - dst_len = dst - dst_start; - return SUCCESS; -} - -template<> -simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - simd::simd8x64 in_2(block_buf+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1); - this->next(in_2, block_2); - reader.advance(); -} - -template<> -simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - json_block block_1 = scanner.next(in_1); - this->next(block_buf, block_1); - reader.advance(); -} - -template -error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { - buf_block_reader reader(buf, len); - json_minifier minifier(dst); - - // Index the first n-1 blocks - while (reader.has_full_block()) { - minifier.step(reader.full_block(), reader); - } - - // Index the last (remainder) block, padded with spaces - uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); - if (remaining_bytes > 0) { - // We do not want to write directly to the output stream. Rather, we write - // to a local buffer (for safety). - uint8_t out_block[STEP_SIZE]; - uint8_t * const guarded_dst{minifier.dst}; - minifier.dst = out_block; - minifier.step(block, reader); - size_t to_write = minifier.dst - out_block; - // In some cases, we could be enticed to consider the padded spaces - // as part of the string. This is fine as long as we do not write more - // than we consumed. - if(to_write > remaining_bytes) { to_write = remaining_bytes; } - memcpy(guarded_dst, out_block, to_write); - minifier.dst = guarded_dst + to_write; - } - return minifier.finish(dst, dst_len); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file src/generic/stage1/json_minifier.h */ -/* begin file src/generic/stage1/find_next_document_index.h */ -namespace simdjson { -namespace ppc64 { -namespace { - -/** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ':' ',' - * and when the second element is NOT one of these characters: '}' ']' ':' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // Variant: do not count separately, just figure out depth - if(parser.n_structural_indexes == 0) { return 0; } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; - } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; - } - // If we made it to the end, we want to finish counting to see if we have a full document. - switch (parser.buf[parser.structural_indexes[0]]) { - case '}': - obj_cnt--; - break; - case ']': - arr_cnt--; - break; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - if (!arr_cnt && !obj_cnt) { - // We have a complete document. - return parser.n_structural_indexes; - } - return 0; -} - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file src/generic/stage1/find_next_document_index.h */ - -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage1 { - -class bit_indexer { -public: - uint32_t *tail; - - simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} - - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this - // - // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own - // version of the code. -#ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); -#else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; -#if defined(SIMDJSON_PREFER_REVERSE_BITS) - /** - * ARM lacks a fast trailing zero instruction, but it has a fast - * bit reversal instruction and a fast leading zero instruction. - * Thus it may be profitable to reverse the bits (once) and then - * to rely on a sequence of instructions that call the leading - * zero instruction. - * - * Performance notes: - * The chosen routine is not optimal in terms of data dependency - * since zero_leading_bit might require two instructions. However, - * it tends to minimize the total number of instructions which is - * beneficial. - */ - - uint64_t rev_bits = reverse_bits(bits); - int cnt = static_cast(count_ones(bits)); - int i = 0; - // Do the first 8 all together - for (; i<8; i++) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (simdjson_unlikely(cnt > 8)) { - i = 8; - for (; i<16; i++) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (simdjson_unlikely(cnt > 16)) { - i = 16; - while (rev_bits != 0) { - int lz = leading_zeroes(rev_bits); - this->tail[i++] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - } - } - this->tail += cnt; -#else // SIMDJSON_PREFER_REVERSE_BITS - /** - * Under recent x64 systems, we often have both a fast trailing zero - * instruction and a fast 'clear-lower-bit' instruction so the following - * algorithm can be competitive. - */ - - int cnt = static_cast(count_ones(bits)); - // Do the first 8 all together - for (int i=0; i<8; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (simdjson_unlikely(cnt > 8)) { - for (int i=8; i<16; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (simdjson_unlikely(cnt > 16)) { - int i = 16; - do { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - i++; - } while (i < cnt); - } - } - - this->tail += cnt; -#endif - } -#endif // SIMDJSON_CUSTOM_BIT_INDEXER - -}; - -class json_structural_indexer { -public: - /** - * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. - * - * @param partial Setting the partial parameter to true allows the find_structural_bits to - * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If - * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. - */ - template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; - -private: - simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); - template - simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); - - json_scanner scanner{}; - utf8_checker checker{}; - bit_indexer indexer; - uint64_t prev_structurals = 0; - uint64_t unescaped_chars_error = 0; -}; - -simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} - -// Skip the last character if it is partial -simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { - if (simdjson_unlikely(len < 3)) { - switch (len) { - case 2: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left - return len; - case 1: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - return len; - case 0: - return len; - } - } - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left - return len; -} - -// -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. -// -template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { - if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - // We guard the rest of the code so that we can assume that len > 0 throughout. - if (len == 0) { return EMPTY; } - if (is_streaming(partial)) { - len = trim_partial_utf8(buf, len); - // If you end up with an empty window after trimming - // the partial UTF-8 bytes, then chances are good that you - // have an UTF-8 formatting error. - if(len == 0) { return UTF8_ERROR; } - } - buf_block_reader reader(buf, len); - json_structural_indexer indexer(parser.structural_indexes.get()); - - // Read all but the last block - while (reader.has_full_block()) { - indexer.step(reader.full_block(), reader); - } - // Take care of the last block (will always be there unless file is empty which is - // not supposed to happen.) - uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } - indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block); - simd::simd8x64 in_2(block+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1, reader.block_index()); - this->next(in_2, block_2, reader.block_index()+64); - reader.advance(); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block); - json_block block_1 = scanner.next(in_1); - this->next(in_1, block_1, reader.block_index()); - reader.advance(); -} - -simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { - uint64_t unescaped = in.lteq(0x1F); - checker.check_next_input(in); - indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser - prev_structurals = block.structural_start(); - unescaped_chars_error |= block.non_quote_inside_string(unescaped); -} - -simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { - // Write out the final iteration's structurals - indexer.write(uint32_t(idx-64), prev_structurals); - error_code error = scanner.finish(); - // We deliberately break down the next expression so that it is - // human readable. - const bool should_we_exit = is_streaming(partial) ? - ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING - : (error != SUCCESS); // if partial is false, we must have SUCCESS - const bool have_unclosed_string = (error == UNCLOSED_STRING); - if (simdjson_unlikely(should_we_exit)) { return error; } - - if (unescaped_chars_error) { - return UNESCAPED_CHARS; - } - parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); - /*** - * The On Demand API requires special padding. - * - * This is related to https://github.com/simdjson/simdjson/issues/906 - * Basically, we want to make sure that if the parsing continues beyond the last (valid) - * structural character, it quickly stops. - * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. - * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing - * continues, then it must be [,] or }. - * Suppose it is ] or }. We backtrack to the first character, what could it be that would - * not trigger an error? It could be ] or } but no, because you can't start a document that way. - * It can't be a comma, a colon or any simple value. So the only way we could continue is - * if the repeated character is [. But if so, the document must start with [. But if the document - * starts with [, it should end with ]. If we enforce that rule, then we would get - * ][[ which is invalid. - * - * This is illustrated with the test array_iterate_unclosed_error() on the following input: - * R"({ "a": [,,)" - **/ - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final - parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); - parser.structural_indexes[parser.n_structural_indexes + 2] = 0; - parser.next_structural_index = 0; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - return EMPTY; - } - if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { - return UNEXPECTED_ERROR; - } - if (partial == stage1_mode::streaming_partial) { - // If we have an unclosed string, then the last structural - // will be the quote and we want to make sure to omit it. - if(have_unclosed_string) { - parser.n_structural_indexes--; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } - } - // We truncate the input to the end of the last complete document (or zero). - auto new_structural_indexes = find_next_document_index(parser); - if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - if(parser.structural_indexes[0] == 0) { - // If the buffer is partial and we started at index 0 but the document is - // incomplete, it's too big to parse. - return CAPACITY; - } else { - // It is possible that the document could be parsed, we just had a lot - // of white space. - parser.n_structural_indexes = 0; - return EMPTY; - } - } - - parser.n_structural_indexes = new_structural_indexes; - } else if (partial == stage1_mode::streaming_final) { - if(have_unclosed_string) { parser.n_structural_indexes--; } - // We truncate the input to the end of the last complete document (or zero). - // Because partial == stage1_mode::streaming_final, it means that we may - // silently ignore trailing garbage. Though it sounds bad, we do it - // deliberately because many people who have streams of JSON documents - // will truncate them for processing. E.g., imagine that you are uncompressing - // the data from a size file or receiving it in chunks from the network. You - // may not know where exactly the last document will be. Meanwhile the - // document_stream instances allow people to know the JSON documents they are - // parsing (see the iterator.source() method). - parser.n_structural_indexes = find_next_document_index(parser); - // We store the initial n_structural_indexes so that the client can see - // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, - // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, - // otherwise, it will copy some prior index. - parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; - // This next line is critical, do not change it unless you understand what you are - // doing. - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - // We tolerate an unclosed string at the very end of the stream. Indeed, users - // often load their data in bulk without being careful and they want us to ignore - // the trailing garbage. - return EMPTY; - } - } - checker.check_eof(); - return checker.errors(); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file src/generic/stage1/json_structural_indexer.h */ -/* begin file src/generic/stage1/utf8_validator.h */ -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage1 { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t * input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return c.errors() == error_code::SUCCESS; -} - -bool generic_validate_utf8(const char * input, size_t length) { - return generic_validate_utf8(reinterpret_cast(input),length); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file src/generic/stage1/utf8_validator.h */ - -// -// Stage 2 -// - -/* begin file src/generic/stage2/tape_builder.h */ -/* begin file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/logger.h */ -// This is for an internal-only stage 2 specific logger. -// Set LOG_ENABLED = true to log what stage 2 is doing! -namespace simdjson { -namespace ppc64 { -namespace { -namespace logger { - - static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; - -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - static constexpr const int LOG_EVENT_LEN = 20; - static constexpr const int LOG_BUFFER_LEN = 30; - static constexpr const int LOG_SMALL_BUFFER_LEN = 10; - static constexpr const int LOG_INDEX_LEN = 5; - - static int log_depth; // Not threadsafe. Log only. - - // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } - } - - // Print the header and set up log_start - static simdjson_really_inline void log_start() { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); - } - } - - simdjson_unused static simdjson_really_inline void log_string(const char *message) { - if (LOG_ENABLED) { - printf("%s\n", message); - } - } - - // Logs a single line from the stage 2 DOM parser - template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { - if (LOG_ENABLED) { - printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); - auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; - auto next_index = structurals.next_structural; - auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); - auto next = &structurals.buf[*next_index]; - { - // Print the next N characters in the buffer. - printf("| "); - // Otherwise, print the characters starting from the buffer position. - // Print spaces for unprintable or newline characters. - for (int i=0;i - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; - - /** - * Create an iterator capable of walking a JSON document. - * - * The document must have already passed through stage 1. - */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); - - /** - * Look at the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *peek() const noexcept; - /** - * Advance to the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *advance() noexcept; - /** - * Get the remaining length of the document, from the start of the current token. - */ - simdjson_really_inline size_t remaining_len() const noexcept; - /** - * Check if we are at the end of the document. - * - * If this is true, there are no more tokens. - */ - simdjson_really_inline bool at_eof() const noexcept; - /** - * Check if we are at the beginning of the document. - */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; - - /** - * Log that a value has been found. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_value(const char *type) const noexcept; - /** - * Log the start of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; - /** - * Log the end of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; - /** - * Log an error. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_error(const char *error) const noexcept; - - template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; - template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; -}; - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { - logger::log_start(); - - // - // Start the document - // - if (at_eof()) { return EMPTY; } - log_start_value("document"); - SIMDJSON_TRY( visitor.visit_document_start(*this) ); - - // - // Read first value - // - { - auto value = advance(); - - // Make sure the outer object or array is closed before continuing; otherwise, there are ways we - // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 - if (!STREAMING) { - switch (*value) { - case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; - } - } - - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; - } - } - goto document_end; - -// -// Object parser states -// -object_begin: - log_start_value("object"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = false; - SIMDJSON_TRY( visitor.visit_object_start(*this) ); - - { - auto key = advance(); - if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.increment_count(*this) ); - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - -object_field: - if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -object_continue: - switch (*advance()) { - case ',': - SIMDJSON_TRY( visitor.increment_count(*this) ); - { - auto key = advance(); - if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - goto object_field; - case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; - default: log_error("No comma between object fields"); return TAPE_ERROR; - } - -scope_end: - depth--; - if (depth == 0) { goto document_end; } - if (dom_parser.is_array[depth]) { goto array_continue; } - goto object_continue; - -// -// Array parser states -// -array_begin: - log_start_value("array"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = true; - SIMDJSON_TRY( visitor.visit_array_start(*this) ); - SIMDJSON_TRY( visitor.increment_count(*this) ); - -array_value: - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -array_continue: - switch (*advance()) { - case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; - case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; - default: log_error("Missing comma between array values"); return TAPE_ERROR; - } - -document_end: - log_end_value("document"); - SIMDJSON_TRY( visitor.visit_document_end(*this) ); - - dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - - // If we didn't make it to the end, it's an error - if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { - log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - return TAPE_ERROR; - } - - return SUCCESS; - -} // walk_document() - -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { -} - -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { - return &buf[*(next_structural)]; -} -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { - return &buf[*(next_structural++)]; -} -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { - return dom_parser.len - *(next_structural-1); -} - -simdjson_really_inline bool json_iterator::at_eof() const noexcept { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; -} -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { - return next_structural == dom_parser.structural_indexes.get(); -} -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { - return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; -} - -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { - logger::log_line(*this, "", type, ""); -} - -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { - logger::log_line(*this, "+", type, ""); - if (logger::LOG_ENABLED) { logger::log_depth++; } -} - -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { - if (logger::LOG_ENABLED) { logger::log_depth--; } - logger::log_line(*this, "-", type, ""); -} - -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { - logger::log_line(*this, "", "ERROR", error); -} - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_root_string(*this, value); - case 't': return visitor.visit_root_true_atom(*this, value); - case 'f': return visitor.visit_root_false_atom(*this, value); - case 'n': return visitor.visit_root_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_root_number(*this, value); - default: - log_error("Document starts with a non-value character"); - return TAPE_ERROR; - } -} -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_string(*this, value); - case 't': return visitor.visit_true_atom(*this, value); - case 'f': return visitor.visit_false_atom(*this, value); - case 'n': return visitor.visit_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_number(*this, value); - default: - log_error("Non-value found when value was expected!"); - return TAPE_ERROR; - } -} - -} // namespace stage2 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/tape_writer.h */ -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage2 { - -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; - - /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; - - /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; - - /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; - - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; - - /** - * Skip the current tape entry without writing. - * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_really_inline void skip() noexcept; - - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_really_inline void skip_large_integer() noexcept; - - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_really_inline void skip_double() noexcept; - - /** - * Write a value to a known location on tape. - * - * Used to go back and write out the start of a container after the container ends. - */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; - -private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct number_writer - -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); -} - -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; -} - -/** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); -} - -simdjson_really_inline void tape_writer::skip() noexcept { - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; -} - -template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); -} - -} // namespace stage2 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file src/generic/stage2/tape_writer.h */ - -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage2 { - -struct tape_builder { - template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept; - - /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; - /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; - - /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; - /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; - /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; - - /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; - /** - * Called when a key in a field is encountered. - * - * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array - * will be called after this with the field value. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; - /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; - /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; - - /** - * Called when a string, number, boolean or null is found. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; - /** - * Called when a string, number, boolean or null is found at the top level of a document (i.e. - * when there is no array or object and the entire document is a single string, number, boolean or - * null. - * - * This is separate from primitive() because simdjson's normal primitive parsing routines assume - * there is at least one more token after the value, which is only true in an array or object. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; - - /** Next location to write to tape */ - tape_writer tape; -private: - /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; - - simdjson_really_inline tape_builder(dom::document &doc) noexcept; - - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; -}; // class tape_builder - -template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept { - dom_parser.doc = &doc; - json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); - tape_builder builder(doc); - return iter.walk_document(builder); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_root_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { - constexpr uint32_t start_tape_index = 0; - tape.append(start_tape_index, internal::tape_type::ROOT); - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { - return visit_string(iter, key, true); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 - return SUCCESS; -} - -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { - iter.log_value(key ? "key" : "string"); - uint8_t *dst = on_start_string(iter); - dst = stringparsing::parse_string(value+1, dst); - if (dst == nullptr) { - iter.log_error("Invalid escape in string"); - return STRING_ERROR; - } - on_end_string(dst); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { - return visit_string(iter, value); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("number"); - return numberparsing::parse_number(value, tape); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { - // - // We need to make a copy to make sure that the string is space terminated. - // This is not about padding the input, which should already padded up - // to len + SIMDJSON_PADDING. However, we have no control at this stage - // on how the padding was done. What if the input string was padded with nulls? - // It is quite common for an input string to have an extra null character (C string). - // We do not want to allow 9\0 (where \0 is the null character) inside a JSON - // document, but the string "9\0" by itself is fine. So we make a copy and - // pad the input with spaces when we know that there is just one input element. - // This copy is relatively expensive, but it will almost never be called in - // practice unless you are in the strange scenario where you have many JSON - // documents made of single atoms. - // - std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); - if (copy.get() == nullptr) { return MEMALLOC; } - std::memcpy(copy.get(), value, iter.remaining_len()); - std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); - error_code error = visit_number(iter, copy.get()); - return error; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -// private: - -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { - return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - auto start_index = next_tape_index(iter); - tape.append(start_index+2, start); - tape.append(start_index, end); - return SUCCESS; -} - -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); - iter.dom_parser.open_containers[iter.depth].count = 0; - tape.skip(); // We don't actually *write* the start element until the end. -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - // Write the ending tape element, pointing at the start location - const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; - tape.append(start_tape_index, end); - // Write the start tape element, pointing at the end location (and including count) - // count can overflow if it exceeds 24 bits... so we saturate - // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). - const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; - const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); - return SUCCESS; -} - -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { - // we advance the point, accounting for the fact that we have a NULL termination - tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); - return current_string_buf_loc + sizeof(uint32_t); -} - -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { - uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); - // TODO check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); - // NULL termination is still handy if you expect all your strings to - // be NULL terminated? It comes at a small cost - *dst = 0; - current_string_buf_loc = dst + 1; -} - -} // namespace stage2 -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file src/generic/stage2/tape_builder.h */ - -// -// Implementation-specific overrides -// -namespace simdjson { -namespace ppc64 { -namespace { -namespace stage1 { - -simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { - // On PPC, we don't short-circuit this if there are no backslashes, because the branch gives us no - // benefit and therefore makes things worse. - // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } - return find_escaped_branchless(backslash); -} - -} // namespace stage1 -} // unnamed namespace - -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { - this->buf = _buf; - this->len = _len; - return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); -} - -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return ppc64::stage1::generic_validate_utf8(buf,len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, stage1_mode::regular); - if (error) { return error; } - return stage2(_doc); -} - -} // namespace ppc64 -} // namespace simdjson - -/* begin file include/simdjson/ppc64/end.h */ -/* end file include/simdjson/ppc64/end.h */ -/* end file src/ppc64/dom_parser_implementation.cpp */ -#endif -#if SIMDJSON_IMPLEMENTATION_WESTMERE -/* begin file src/westmere/implementation.cpp */ -/* begin file include/simdjson/westmere/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "westmere" -// #define SIMDJSON_IMPLEMENTATION westmere -SIMDJSON_TARGET_WESTMERE -/* end file include/simdjson/westmere/begin.h */ - -namespace simdjson { -namespace westmere { - -simdjson_warn_unused error_code implementation::create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr& dst -) const noexcept { - dst.reset( new (std::nothrow) dom_parser_implementation() ); - if (!dst) { return MEMALLOC; } - if (auto err = dst->set_capacity(capacity)) - return err; - if (auto err = dst->set_max_depth(max_depth)) - return err; - return SUCCESS; -} - -} // namespace westmere -} // namespace simdjson - -/* begin file include/simdjson/westmere/end.h */ -SIMDJSON_UNTARGET_WESTMERE -/* end file include/simdjson/westmere/end.h */ -/* end file src/westmere/implementation.cpp */ -/* begin file src/westmere/dom_parser_implementation.cpp */ -/* begin file include/simdjson/westmere/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "westmere" -// #define SIMDJSON_IMPLEMENTATION westmere -SIMDJSON_TARGET_WESTMERE -/* end file include/simdjson/westmere/begin.h */ - -// -// Stage 1 -// - -namespace simdjson { -namespace westmere { -namespace { - -using namespace simd; - -struct json_character_block { - static simdjson_really_inline json_character_block classify(const simd::simd8x64& in); - - simdjson_really_inline uint64_t whitespace() const noexcept { return _whitespace; } - simdjson_really_inline uint64_t op() const noexcept { return _op; } - simdjson_really_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } - - uint64_t _whitespace; - uint64_t _op; -}; - -simdjson_really_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { - // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why - // we can't use the generic lookup_16. - auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); - - // The 6 operators (:,[]{}) have these values: - // - // , 2C - // : 3A - // [ 5B - // { 7B - // ] 5D - // } 7D - // - // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. - // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then - // match it (against | 0x20). - // - // To prevent recognizing other characters, everything else gets compared with 0, which cannot - // match due to the | 0x20. - // - // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , - // and :. This gets caught in stage 2, which checks the actual character to ensure the right - // operators are in the right places. - const auto op_table = simd8::repeat_16( - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B - ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D - ); - - // We compute whitespace and op separately. If the code later only use one or the - // other, given the fact that all functions are aggressively inlined, we can - // hope that useless computations will be omitted. This is namely case when - // minifying (we only need whitespace). - - - const uint64_t whitespace = in.eq({ - _mm_shuffle_epi8(whitespace_table, in.chunks[0]), - _mm_shuffle_epi8(whitespace_table, in.chunks[1]), - _mm_shuffle_epi8(whitespace_table, in.chunks[2]), - _mm_shuffle_epi8(whitespace_table, in.chunks[3]) - }); - // Turn [ and ] into { and } - const simd8x64 curlified{ - in.chunks[0] | 0x20, - in.chunks[1] | 0x20, - in.chunks[2] | 0x20, - in.chunks[3] | 0x20 - }; - const uint64_t op = curlified.eq({ - _mm_shuffle_epi8(op_table, in.chunks[0]), - _mm_shuffle_epi8(op_table, in.chunks[1]), - _mm_shuffle_epi8(op_table, in.chunks[2]), - _mm_shuffle_epi8(op_table, in.chunks[3]) - }); - return { whitespace, op }; -} - -simdjson_really_inline bool is_ascii(const simd8x64& input) { - return input.reduce_or().is_ascii(); -} - -simdjson_unused simdjson_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { - simd8 is_second_byte = prev1.saturating_sub(0b11000000u-1); // Only 11______ will be > 0 - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); -} - -simdjson_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { - simd8 is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0 - simd8 is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0 - // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. - return simd8(is_third_byte | is_fourth_byte) > int8_t(0); -} - -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ -namespace simdjson { -namespace westmere { -namespace { -namespace utf8_validation { - -using namespace simd; - - simdjson_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { -// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) -// Bit 1 = Too Long (ASCII followed by continuation) -// Bit 2 = Overlong 3-byte -// Bit 4 = Surrogate -// Bit 5 = Overlong 2-byte -// Bit 7 = Two Continuations - constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ - // 11______ 11______ - constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ - constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ - constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ - constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ - constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ - constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ - // 11110100 101_____ - // 11110101 1001____ - // 11110101 101_____ - // 1111011_ 1001____ - // 1111011_ 101_____ - // 11111___ 1001____ - // 11111___ 101_____ - constexpr const uint8_t TOO_LARGE_1000 = 1<<6; - // 11110101 1000____ - // 1111011_ 1000____ - // 11111___ 1000____ - constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ - - const simd8 byte_1_high = prev1.shr<4>().lookup_16( - // 0_______ ________ - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, - // 10______ ________ - TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, - // 1100____ ________ - TOO_SHORT | OVERLONG_2, - // 1101____ ________ - TOO_SHORT, - // 1110____ ________ - TOO_SHORT | OVERLONG_3 | SURROGATE, - // 1111____ ________ - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 - ); - constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . - const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( - // ____0000 ________ - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - // ____0001 ________ - CARRY | OVERLONG_2, - // ____001_ ________ - CARRY, - CARRY, - - // ____0100 ________ - CARRY | TOO_LARGE, - // ____0101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____011_ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - - // ____1___ ________ - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - // ____1101 ________ - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 - ); - const simd8 byte_2_high = input.shr<4>().lookup_16( - // ________ 0_______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, - - // ________ 1000____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, - // ________ 1001____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - // ________ 101_____ - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - - // ________ 11______ - TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT - ); - return (byte_1_high & byte_1_low & byte_2_high); - } - simdjson_really_inline simd8 check_multibyte_lengths(const simd8 input, - const simd8 prev_input, const simd8 sc) { - simd8 prev2 = input.prev<2>(prev_input); - simd8 prev3 = input.prev<3>(prev_input); - simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); - simd8 must23_80 = must23 & uint8_t(0x80); - return must23_80 ^ sc; - } - - // - // Return nonzero if there are incomplete multibyte characters at the end of the block: - // e.g. if there is a 4-byte character, but it's 3 bytes from the end. - // - simdjson_really_inline simd8 is_incomplete(const simd8 input) { - // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): - // ... 1111____ 111_____ 11______ -#if SIMDJSON_IMPLEMENTATION_ICELAKE - static const uint8_t max_array[64] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; -#else - static const uint8_t max_array[32] = { - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1 - }; -#endif - const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); - return input.gt_bits(max_value); - } - - struct utf8_checker { - // If this is nonzero, there has been a UTF-8 error. - simd8 error; - // The last input we received - simd8 prev_input_block; - // Whether the last input we received was incomplete (used for ASCII fast path) - simd8 prev_incomplete; - - // - // Check whether the current bytes are valid UTF-8. - // - simdjson_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { - // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes - // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) - simd8 prev1 = input.prev<1>(prev_input); - simd8 sc = check_special_cases(input, prev1); - this->error |= check_multibyte_lengths(input, prev_input, sc); - } - - // The only problem that can happen at EOF is that a multibyte character is too short - // or a byte value too large in the last bytes: check_special_cases only checks for bytes - // too large in the first of two bytes. - simdjson_really_inline void check_eof() { - // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't - // possibly finish them. - this->error |= this->prev_incomplete; - } - - simdjson_really_inline void check_next_input(const simd8x64& input) { - if(simdjson_likely(is_ascii(input))) { - this->error |= this->prev_incomplete; - } else { - // you might think that a for-loop would work, but under Visual Studio, it is not good enough. - static_assert((simd8x64::NUM_CHUNKS == 1) - ||(simd8x64::NUM_CHUNKS == 2) - || (simd8x64::NUM_CHUNKS == 4), - "We support one, two or four chunks per 64-byte block."); - if(simd8x64::NUM_CHUNKS == 1) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - } if(simd8x64::NUM_CHUNKS == 2) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - } else if(simd8x64::NUM_CHUNKS == 4) { - this->check_utf8_bytes(input.chunks[0], this->prev_input_block); - this->check_utf8_bytes(input.chunks[1], input.chunks[0]); - this->check_utf8_bytes(input.chunks[2], input.chunks[1]); - this->check_utf8_bytes(input.chunks[3], input.chunks[2]); - } - this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); - this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; - } - } - // do not forget to call check_eof! - simdjson_really_inline error_code errors() { - return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; - } - - }; // struct utf8_checker -} // namespace utf8_validation - -using utf8_validation::utf8_checker; - -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ -/* begin file src/generic/stage1/json_structural_indexer.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -/* begin file src/generic/stage1/buf_block_reader.h */ -namespace simdjson { -namespace westmere { -namespace { - -// Walks through a buffer in block-sized increments, loading the last part with spaces -template -struct buf_block_reader { -public: - simdjson_really_inline buf_block_reader(const uint8_t *_buf, size_t _len); - simdjson_really_inline size_t block_index(); - simdjson_really_inline bool has_full_block() const; - simdjson_really_inline const uint8_t *full_block() const; - /** - * Get the last block, padded with spaces. - * - * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this - * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there - * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. - * - * @return the number of effective characters in the last block. - */ - simdjson_really_inline size_t get_remainder(uint8_t *dst) const; - simdjson_really_inline void advance(); -private: - const uint8_t *buf; - const size_t len; - const size_t lenminusstep; - size_t idx; -}; - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text_64(const uint8_t *text) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i); i++) { - buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -// Routines to print masks and text for debugging bitmask operations -simdjson_unused static char * format_input_text(const simd8x64& in) { - static char buf[sizeof(simd8x64) + 1]; - in.store(reinterpret_cast(buf)); - for (size_t i=0; i); i++) { - if (buf[i] < ' ') { buf[i] = '_'; } - } - buf[sizeof(simd8x64)] = '\0'; - return buf; -} - -simdjson_unused static char * format_mask(uint64_t mask) { - static char buf[sizeof(simd8x64) + 1]; - for (size_t i=0; i<64; i++) { - buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; - } - buf[64] = '\0'; - return buf; -} - -template -simdjson_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} - -template -simdjson_really_inline size_t buf_block_reader::block_index() { return idx; } - -template -simdjson_really_inline bool buf_block_reader::has_full_block() const { - return idx < lenminusstep; -} - -template -simdjson_really_inline const uint8_t *buf_block_reader::full_block() const { - return &buf[idx]; -} - -template -simdjson_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { - if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers - std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. - std::memcpy(dst, buf + idx, len - idx); - return len - idx; -} - -template -simdjson_really_inline void buf_block_reader::advance() { - idx += STEP_SIZE; -} - -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file src/generic/stage1/buf_block_reader.h */ -/* begin file src/generic/stage1/json_string_scanner.h */ -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { - -struct json_string_block { - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : - _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} - - // Escaped characters (characters following an escape() character) - simdjson_really_inline uint64_t escaped() const { return _escaped; } - // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) - simdjson_really_inline uint64_t escape() const { return _backslash & ~_escaped; } - // Real (non-backslashed) quotes - simdjson_really_inline uint64_t quote() const { return _quote; } - // Start quotes of strings - simdjson_really_inline uint64_t string_start() const { return _quote & _in_string; } - // End quotes of strings - simdjson_really_inline uint64_t string_end() const { return _quote & ~_in_string; } - // Only characters inside the string (not including the quotes) - simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } - // Return a mask of whether the given characters are inside a string (only works on non-quotes) - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } - // Tail of string (everything except the start quote) - simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } - - // backslash characters - uint64_t _backslash; - // escaped characters (backslashed--does not include the hex characters after \u) - uint64_t _escaped; - // real quotes (non-backslashed ones) - uint64_t _quote; - // string characters (includes start quote but not end quote) - uint64_t _in_string; -}; - -// Scans blocks for string characters, storing the state necessary to do so -class json_string_scanner { -public: - simdjson_really_inline json_string_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Intended to be defined by the implementation - simdjson_really_inline uint64_t find_escaped(uint64_t escape); - simdjson_really_inline uint64_t find_escaped_branchless(uint64_t escape); - - // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). - uint64_t prev_in_string = 0ULL; - // Whether the first character of the next iteration is escaped. - uint64_t prev_escaped = 0ULL; -}; - -// -// Finds escaped characters (characters following \). -// -// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). -// -// Does this by: -// - Shift the escape mask to get potentially escaped characters (characters after backslashes). -// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) -// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) -// -// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all -// escape sequences, filters out the ones that start on even bits, and adds that to the mask of -// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since -// the start bit causes a carry), and leaves even-bit sequences alone. -// -// Example: -// -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape -// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape -// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later -// invert_mask | | cxxx c xx c| even_seq << 1 -// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit -// escaped | x | x x x x x x x x | -// desired | x | x x x x x x x x | -// text | \\\ | \\\"\\\" \\\" \\"\\" | -// -simdjson_really_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { - // If there was overflow, pretend the first character isn't a backslash - backslash &= ~prev_escaped; - uint64_t follows_escape = backslash << 1 | prev_escaped; - - // Get sequences starting on even bits by clearing out the odd series using + - const uint64_t even_bits = 0x5555555555555555ULL; - uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; - uint64_t sequences_starting_on_even_bits; - prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); - uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. - - // Mask every other backslashed character as an escaped character - // Flip the mask for sequences that start on even bits, to correct them - return (even_bits ^ invert_mask) & follows_escape; -} - -// -// Return a mask of all string characters plus end quotes. -// -// prev_escaped is overflow saying whether the next character is escaped. -// prev_in_string is overflow saying whether we're still in a string. -// -// Backslash sequences outside of quotes will be detected in stage 2. -// -simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { - const uint64_t backslash = in.eq('\\'); - const uint64_t escaped = find_escaped(backslash); - const uint64_t quote = in.eq('"') & ~escaped; - - // - // prefix_xor flips on bits inside the string (and flips off the end quote). - // - // Then we xor with prev_in_string: if we were in a string already, its effect is flipped - // (characters inside strings are outside, and characters outside strings are inside). - // - const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; - - // - // Check if we're still in a string at the end of the box so the next block will know - // - // right shift of a signed value expected to be well-defined and standard - // compliant as of C++20, John Regher from Utah U. says this is fine code - // - prev_in_string = uint64_t(static_cast(in_string) >> 63); - - // Use ^ to turn the beginning quote off, and the end quote on. - - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_string_block( - backslash, - escaped, - quote, - in_string - ); -} - -simdjson_really_inline error_code json_string_scanner::finish() { - if (prev_in_string) { - return UNCLOSED_STRING; - } - return SUCCESS; -} - -} // namespace stage1 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file src/generic/stage1/json_string_scanner.h */ -/* begin file src/generic/stage1/json_scanner.h */ -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { - -/** - * A block of scanned json, with information on operators and scalars. - * - * We seek to identify pseudo-structural characters. Anything that is inside - * a string must be omitted (hence & ~_string.string_tail()). - * Otherwise, pseudo-structural characters come in two forms. - * 1. We have the structural characters ([,],{,},:, comma). The - * term 'structural character' is from the JSON RFC. - * 2. We have the 'scalar pseudo-structural characters'. - * Scalars are quotes, and any character except structural characters and white space. - * - * To identify the scalar pseudo-structural characters, we must look at what comes - * before them: it must be a space, a quote or a structural characters. - * Starting with simdjson v0.3, we identify them by - * negation: we identify everything that is followed by a non-quote scalar, - * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. - */ -struct json_block { -public: - // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 - simdjson_really_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - simdjson_really_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : - _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} - - /** - * The start of structurals. - * In simdjson prior to v0.3, these were called the pseudo-structural characters. - **/ - simdjson_really_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } - /** All JSON whitespace (i.e. not in a string) */ - simdjson_really_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } - - // Helpers - - /** Whether the given characters are inside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } - /** Whether the given characters are outside a string (only works on non-quotes) */ - simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } - - // string and escape characters - json_string_block _string; - // whitespace, structural characters ('operators'), scalars - json_character_block _characters; - // whether the previous character was a scalar - uint64_t _follows_potential_nonquote_scalar; -private: - // Potential structurals (i.e. disregarding strings) - - /** - * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". - * They may reside inside a string. - **/ - simdjson_really_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } - /** - * The start of non-operator runs, like 123, true and "abc". - * It main reside inside a string. - **/ - simdjson_really_inline uint64_t potential_scalar_start() const noexcept { - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space - // then we know that it is irrelevant structurally. - return _characters.scalar() & ~follows_potential_scalar(); - } - /** - * Whether the given character is immediately after a non-operator like 123, true. - * The characters following a quote are not included. - */ - simdjson_really_inline uint64_t follows_potential_scalar() const noexcept { - // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character - // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a - // white space. - // It is understood that within quoted region, anything at all could be marked (irrelevant). - return _follows_potential_nonquote_scalar; - } -}; - -/** - * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. - * - * The scanner starts by calculating two distinct things: - * - string characters (taking \" into account) - * - structural characters or 'operators' ([]{},:, comma) - * and scalars (runs of non-operators like 123, true and "abc") - * - * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: - * in particular, the operator/scalar bit will find plenty of things that are actually part of - * strings. When we're done, json_block will fuse the two together by masking out tokens that are - * part of a string. - */ -class json_scanner { -public: - json_scanner() {} - simdjson_really_inline json_block next(const simd::simd8x64& in); - // Returns either UNCLOSED_STRING or SUCCESS - simdjson_really_inline error_code finish(); - -private: - // Whether the last character of the previous iteration is part of a scalar token - // (anything except whitespace or a structural character/'operator'). - uint64_t prev_scalar = 0ULL; - json_string_scanner string_scanner{}; -}; - - -// -// Check if the current character immediately follows a matching character. -// -// For example, this checks for quotes with backslashes in front of them: -// -// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); -// -simdjson_really_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { - const uint64_t result = match << 1 | overflow; - overflow = match >> 63; - return result; -} - -simdjson_really_inline json_block json_scanner::next(const simd::simd8x64& in) { - json_string_block strings = string_scanner.next(in); - // identifies the white-space and the structural characters - json_character_block characters = json_character_block::classify(in); - // The term "scalar" refers to anything except structural characters and white space - // (so letters, numbers, quotes). - // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). - // - // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) - // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential - // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we - // may need to add an extra check when parsing strings. - // - // Performance: there are many ways to skin this cat. - const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); - uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); - // We are returning a function-local object so either we get a move constructor - // or we get copy elision. - return json_block( - strings,// strings is a function-local object so either it moves or the copy is elided. - characters, - follows_nonquote_scalar - ); -} - -simdjson_really_inline error_code json_scanner::finish() { - return string_scanner.finish(); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file src/generic/stage1/json_scanner.h */ -/* begin file src/generic/stage1/json_minifier.h */ -// This file contains the common code every implementation uses in stage1 -// It is intended to be included multiple times and compiled multiple times -// We assume the file in which it is included already includes -// "simdjson/stage1.h" (this simplifies amalgation) - -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { - -class json_minifier { -public: - template - static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; - -private: - simdjson_really_inline json_minifier(uint8_t *_dst) - : dst{_dst} - {} - template - simdjson_really_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block); - simdjson_really_inline error_code finish(uint8_t *dst_start, size_t &dst_len); - json_scanner scanner{}; - uint8_t *dst; -}; - -simdjson_really_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { - uint64_t mask = block.whitespace(); - dst += in.compress(mask, dst); -} - -simdjson_really_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { - error_code error = scanner.finish(); - if (error) { dst_len = 0; return error; } - dst_len = dst - dst_start; - return SUCCESS; -} - -template<> -simdjson_really_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - simd::simd8x64 in_2(block_buf+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1); - this->next(in_2, block_2); - reader.advance(); -} - -template<> -simdjson_really_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block_buf); - json_block block_1 = scanner.next(in_1); - this->next(block_buf, block_1); - reader.advance(); -} - -template -error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { - buf_block_reader reader(buf, len); - json_minifier minifier(dst); - - // Index the first n-1 blocks - while (reader.has_full_block()) { - minifier.step(reader.full_block(), reader); - } - - // Index the last (remainder) block, padded with spaces - uint8_t block[STEP_SIZE]; - size_t remaining_bytes = reader.get_remainder(block); - if (remaining_bytes > 0) { - // We do not want to write directly to the output stream. Rather, we write - // to a local buffer (for safety). - uint8_t out_block[STEP_SIZE]; - uint8_t * const guarded_dst{minifier.dst}; - minifier.dst = out_block; - minifier.step(block, reader); - size_t to_write = minifier.dst - out_block; - // In some cases, we could be enticed to consider the padded spaces - // as part of the string. This is fine as long as we do not write more - // than we consumed. - if(to_write > remaining_bytes) { to_write = remaining_bytes; } - memcpy(guarded_dst, out_block, to_write); - minifier.dst = guarded_dst + to_write; - } - return minifier.finish(dst, dst_len); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file src/generic/stage1/json_minifier.h */ -/* begin file src/generic/stage1/find_next_document_index.h */ -namespace simdjson { -namespace westmere { -namespace { - -/** - * This algorithm is used to quickly identify the last structural position that - * makes up a complete document. - * - * It does this by going backwards and finding the last *document boundary* (a - * place where one value follows another without a comma between them). If the - * last document (the characters after the boundary) has an equal number of - * start and end brackets, it is considered complete. - * - * Simply put, we iterate over the structural characters, starting from - * the end. We consider that we found the end of a JSON document when the - * first element of the pair is NOT one of these characters: '{' '[' ':' ',' - * and when the second element is NOT one of these characters: '}' ']' ':' ','. - * - * This simple comparison works most of the time, but it does not cover cases - * where the batch's structural indexes contain a perfect amount of documents. - * In such a case, we do not have access to the structural index which follows - * the last document, therefore, we do not have access to the second element in - * the pair, and that means we cannot identify the last document. To fix this - * issue, we keep a count of the open and closed curly/square braces we found - * while searching for the pair. When we find a pair AND the count of open and - * closed curly/square braces is the same, we know that we just passed a - * complete document, therefore the last json buffer location is the end of the - * batch. - */ -simdjson_really_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { - // Variant: do not count separately, just figure out depth - if(parser.n_structural_indexes == 0) { return 0; } - auto arr_cnt = 0; - auto obj_cnt = 0; - for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { - auto idxb = parser.structural_indexes[i]; - switch (parser.buf[idxb]) { - case ':': - case ',': - continue; - case '}': - obj_cnt--; - continue; - case ']': - arr_cnt--; - continue; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - auto idxa = parser.structural_indexes[i - 1]; - switch (parser.buf[idxa]) { - case '{': - case '[': - case ':': - case ',': - continue; - } - // Last document is complete, so the next document will appear after! - if (!arr_cnt && !obj_cnt) { - return parser.n_structural_indexes; - } - // Last document is incomplete; mark the document at i + 1 as the next one - return i; - } - // If we made it to the end, we want to finish counting to see if we have a full document. - switch (parser.buf[parser.structural_indexes[0]]) { - case '}': - obj_cnt--; - break; - case ']': - arr_cnt--; - break; - case '{': - obj_cnt++; - break; - case '[': - arr_cnt++; - break; - } - if (!arr_cnt && !obj_cnt) { - // We have a complete document. - return parser.n_structural_indexes; - } - return 0; -} - -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file src/generic/stage1/find_next_document_index.h */ - -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { - -class bit_indexer { -public: - uint32_t *tail; - - simdjson_really_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} - - // flatten out values in 'bits' assuming that they are are to have values of idx - // plus their position in the bitvector, and store these indexes at - // base_ptr[base] incrementing base as we go - // will potentially store extra values beyond end of valid bits, so base_ptr - // needs to be large enough to handle this - // - // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own - // version of the code. -#ifdef SIMDJSON_CUSTOM_BIT_INDEXER - simdjson_really_inline void write(uint32_t idx, uint64_t bits); -#else - simdjson_really_inline void write(uint32_t idx, uint64_t bits) { - // In some instances, the next branch is expensive because it is mispredicted. - // Unfortunately, in other cases, - // it helps tremendously. - if (bits == 0) - return; -#if defined(SIMDJSON_PREFER_REVERSE_BITS) - /** - * ARM lacks a fast trailing zero instruction, but it has a fast - * bit reversal instruction and a fast leading zero instruction. - * Thus it may be profitable to reverse the bits (once) and then - * to rely on a sequence of instructions that call the leading - * zero instruction. - * - * Performance notes: - * The chosen routine is not optimal in terms of data dependency - * since zero_leading_bit might require two instructions. However, - * it tends to minimize the total number of instructions which is - * beneficial. - */ - - uint64_t rev_bits = reverse_bits(bits); - int cnt = static_cast(count_ones(bits)); - int i = 0; - // Do the first 8 all together - for (; i<8; i++) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (simdjson_unlikely(cnt > 8)) { - i = 8; - for (; i<16; i++) { - int lz = leading_zeroes(rev_bits); - this->tail[i] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (simdjson_unlikely(cnt > 16)) { - i = 16; - while (rev_bits != 0) { - int lz = leading_zeroes(rev_bits); - this->tail[i++] = static_cast(idx) + lz; - rev_bits = zero_leading_bit(rev_bits, lz); - } - } - } - this->tail += cnt; -#else // SIMDJSON_PREFER_REVERSE_BITS - /** - * Under recent x64 systems, we often have both a fast trailing zero - * instruction and a fast 'clear-lower-bit' instruction so the following - * algorithm can be competitive. - */ - - int cnt = static_cast(count_ones(bits)); - // Do the first 8 all together - for (int i=0; i<8; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Do the next 8 all together (we hope in most cases it won't happen at all - // and the branch is easily predicted). - if (simdjson_unlikely(cnt > 8)) { - for (int i=8; i<16; i++) { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - } - - // Most files don't have 16+ structurals per block, so we take several basically guaranteed - // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) - // or the start of a value ("abc" true 123) every four characters. - if (simdjson_unlikely(cnt > 16)) { - int i = 16; - do { - this->tail[i] = idx + trailing_zeroes(bits); - bits = clear_lowest_bit(bits); - i++; - } while (i < cnt); - } - } - - this->tail += cnt; -#endif - } -#endif // SIMDJSON_CUSTOM_BIT_INDEXER - -}; - -class json_structural_indexer { -public: - /** - * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. - * - * @param partial Setting the partial parameter to true allows the find_structural_bits to - * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If - * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. - */ - template - static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; - -private: - simdjson_really_inline json_structural_indexer(uint32_t *structural_indexes); - template - simdjson_really_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; - simdjson_really_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); - simdjson_really_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); - - json_scanner scanner{}; - utf8_checker checker{}; - bit_indexer indexer; - uint64_t prev_structurals = 0; - uint64_t unescaped_chars_error = 0; -}; - -simdjson_really_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} - -// Skip the last character if it is partial -simdjson_really_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { - if (simdjson_unlikely(len < 3)) { - switch (len) { - case 2: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 2 bytes left - return len; - case 1: - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - return len; - case 0: - return len; - } - } - if (buf[len-1] >= 0b11000000) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left - if (buf[len-2] >= 0b11100000) { return len-2; } // 3- and 4-byte characters with only 1 byte left - if (buf[len-3] >= 0b11110000) { return len-3; } // 4-byte characters with only 3 bytes left - return len; -} - -// -// PERF NOTES: -// We pipe 2 inputs through these stages: -// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load -// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. -// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. -// The output of step 1 depends entirely on this information. These functions don't quite use -// up enough CPU: the second half of the functions is highly serial, only using 1 execution core -// at a time. The second input's scans has some dependency on the first ones finishing it, but -// they can make a lot of progress before they need that information. -// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that -// to finish: utf-8 checks and generating the output from the last iteration. -// -// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all -// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough -// workout. -// -template -error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { - if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } - // We guard the rest of the code so that we can assume that len > 0 throughout. - if (len == 0) { return EMPTY; } - if (is_streaming(partial)) { - len = trim_partial_utf8(buf, len); - // If you end up with an empty window after trimming - // the partial UTF-8 bytes, then chances are good that you - // have an UTF-8 formatting error. - if(len == 0) { return UTF8_ERROR; } - } - buf_block_reader reader(buf, len); - json_structural_indexer indexer(parser.structural_indexes.get()); - - // Read all but the last block - while (reader.has_full_block()) { - indexer.step(reader.full_block(), reader); - } - // Take care of the last block (will always be there unless file is empty which is - // not supposed to happen.) - uint8_t block[STEP_SIZE]; - if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } - indexer.step(block, reader); - return indexer.finish(parser, reader.block_index(), len, partial); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { - simd::simd8x64 in_1(block); - simd::simd8x64 in_2(block+64); - json_block block_1 = scanner.next(in_1); - json_block block_2 = scanner.next(in_2); - this->next(in_1, block_1, reader.block_index()); - this->next(in_2, block_2, reader.block_index()+64); - reader.advance(); -} - -template<> -simdjson_really_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { - simd::simd8x64 in_1(block); - json_block block_1 = scanner.next(in_1); - this->next(in_1, block_1, reader.block_index()); - reader.advance(); -} - -simdjson_really_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { - uint64_t unescaped = in.lteq(0x1F); - checker.check_next_input(in); - indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser - prev_structurals = block.structural_start(); - unescaped_chars_error |= block.non_quote_inside_string(unescaped); -} - -simdjson_really_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { - // Write out the final iteration's structurals - indexer.write(uint32_t(idx-64), prev_structurals); - error_code error = scanner.finish(); - // We deliberately break down the next expression so that it is - // human readable. - const bool should_we_exit = is_streaming(partial) ? - ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING - : (error != SUCCESS); // if partial is false, we must have SUCCESS - const bool have_unclosed_string = (error == UNCLOSED_STRING); - if (simdjson_unlikely(should_we_exit)) { return error; } - - if (unescaped_chars_error) { - return UNESCAPED_CHARS; - } - parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); - /*** - * The On Demand API requires special padding. - * - * This is related to https://github.com/simdjson/simdjson/issues/906 - * Basically, we want to make sure that if the parsing continues beyond the last (valid) - * structural character, it quickly stops. - * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. - * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing - * continues, then it must be [,] or }. - * Suppose it is ] or }. We backtrack to the first character, what could it be that would - * not trigger an error? It could be ] or } but no, because you can't start a document that way. - * It can't be a comma, a colon or any simple value. So the only way we could continue is - * if the repeated character is [. But if so, the document must start with [. But if the document - * starts with [, it should end with ]. If we enforce that rule, then we would get - * ][[ which is invalid. - * - * This is illustrated with the test array_iterate_unclosed_error() on the following input: - * R"({ "a": [,,)" - **/ - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final - parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); - parser.structural_indexes[parser.n_structural_indexes + 2] = 0; - parser.next_structural_index = 0; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - return EMPTY; - } - if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { - return UNEXPECTED_ERROR; - } - if (partial == stage1_mode::streaming_partial) { - // If we have an unclosed string, then the last structural - // will be the quote and we want to make sure to omit it. - if(have_unclosed_string) { - parser.n_structural_indexes--; - // a valid JSON file cannot have zero structural indexes - we should have found something - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } - } - // We truncate the input to the end of the last complete document (or zero). - auto new_structural_indexes = find_next_document_index(parser); - if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { - if(parser.structural_indexes[0] == 0) { - // If the buffer is partial and we started at index 0 but the document is - // incomplete, it's too big to parse. - return CAPACITY; - } else { - // It is possible that the document could be parsed, we just had a lot - // of white space. - parser.n_structural_indexes = 0; - return EMPTY; - } - } - - parser.n_structural_indexes = new_structural_indexes; - } else if (partial == stage1_mode::streaming_final) { - if(have_unclosed_string) { parser.n_structural_indexes--; } - // We truncate the input to the end of the last complete document (or zero). - // Because partial == stage1_mode::streaming_final, it means that we may - // silently ignore trailing garbage. Though it sounds bad, we do it - // deliberately because many people who have streams of JSON documents - // will truncate them for processing. E.g., imagine that you are uncompressing - // the data from a size file or receiving it in chunks from the network. You - // may not know where exactly the last document will be. Meanwhile the - // document_stream instances allow people to know the JSON documents they are - // parsing (see the iterator.source() method). - parser.n_structural_indexes = find_next_document_index(parser); - // We store the initial n_structural_indexes so that the client can see - // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, - // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, - // otherwise, it will copy some prior index. - parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; - // This next line is critical, do not change it unless you understand what you are - // doing. - parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); - if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { - // We tolerate an unclosed string at the very end of the stream. Indeed, users - // often load their data in bulk without being careful and they want us to ignore - // the trailing garbage. - return EMPTY; - } - } - checker.check_eof(); - return checker.errors(); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file src/generic/stage1/json_structural_indexer.h */ -/* begin file src/generic/stage1/utf8_validator.h */ -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { - -/** - * Validates that the string is actual UTF-8. - */ -template -bool generic_validate_utf8(const uint8_t * input, size_t length) { - checker c{}; - buf_block_reader<64> reader(input, length); - while (reader.has_full_block()) { - simd::simd8x64 in(reader.full_block()); - c.check_next_input(in); - reader.advance(); - } - uint8_t block[64]{}; - reader.get_remainder(block); - simd::simd8x64 in(block); - c.check_next_input(in); - reader.advance(); - c.check_eof(); - return c.errors() == error_code::SUCCESS; -} - -bool generic_validate_utf8(const char * input, size_t length) { - return generic_validate_utf8(reinterpret_cast(input),length); -} - -} // namespace stage1 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file src/generic/stage1/utf8_validator.h */ - -// -// Stage 2 -// -/* begin file src/generic/stage2/tape_builder.h */ -/* begin file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/logger.h */ -// This is for an internal-only stage 2 specific logger. -// Set LOG_ENABLED = true to log what stage 2 is doing! -namespace simdjson { -namespace westmere { -namespace { -namespace logger { - - static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; - -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - static constexpr const int LOG_EVENT_LEN = 20; - static constexpr const int LOG_BUFFER_LEN = 30; - static constexpr const int LOG_SMALL_BUFFER_LEN = 10; - static constexpr const int LOG_INDEX_LEN = 5; - - static int log_depth; // Not threadsafe. Log only. - - // Helper to turn unprintable or newline characters into spaces - static simdjson_really_inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } - } - - // Print the header and set up log_start - static simdjson_really_inline void log_start() { - if (LOG_ENABLED) { - log_depth = 0; - printf("\n"); - printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); - printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); - } - } - - simdjson_unused static simdjson_really_inline void log_string(const char *message) { - if (LOG_ENABLED) { - printf("%s\n", message); - } - } - - // Logs a single line from the stage 2 DOM parser - template - static simdjson_really_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { - if (LOG_ENABLED) { - printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); - auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; - auto next_index = structurals.next_structural; - auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); - auto next = &structurals.buf[*next_index]; - { - // Print the next N characters in the buffer. - printf("| "); - // Otherwise, print the characters starting from the buffer position. - // Print spaces for unprintable or newline characters. - for (int i=0;i - simdjson_warn_unused simdjson_really_inline error_code walk_document(V &visitor) noexcept; - - /** - * Create an iterator capable of walking a JSON document. - * - * The document must have already passed through stage 1. - */ - simdjson_really_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); - - /** - * Look at the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *peek() const noexcept; - /** - * Advance to the next token. - * - * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). - * - * They may include invalid JSON as well (such as `1.2.3` or `ture`). - */ - simdjson_really_inline const uint8_t *advance() noexcept; - /** - * Get the remaining length of the document, from the start of the current token. - */ - simdjson_really_inline size_t remaining_len() const noexcept; - /** - * Check if we are at the end of the document. - * - * If this is true, there are no more tokens. - */ - simdjson_really_inline bool at_eof() const noexcept; - /** - * Check if we are at the beginning of the document. - */ - simdjson_really_inline bool at_beginning() const noexcept; - simdjson_really_inline uint8_t last_structural() const noexcept; - - /** - * Log that a value has been found. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_value(const char *type) const noexcept; - /** - * Log the start of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_start_value(const char *type) const noexcept; - /** - * Log the end of a multipart value. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_end_value(const char *type) const noexcept; - /** - * Log an error. - * - * Set LOG_ENABLED=true in logger.h to see logging. - */ - simdjson_really_inline void log_error(const char *error) const noexcept; - - template - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; - template - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; -}; - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::walk_document(V &visitor) noexcept { - logger::log_start(); - - // - // Start the document - // - if (at_eof()) { return EMPTY; } - log_start_value("document"); - SIMDJSON_TRY( visitor.visit_document_start(*this) ); - - // - // Read first value - // - { - auto value = advance(); - - // Make sure the outer object or array is closed before continuing; otherwise, there are ways we - // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 - if (!STREAMING) { - switch (*value) { - case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; - case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; - } - } - - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; - } - } - goto document_end; - -// -// Object parser states -// -object_begin: - log_start_value("object"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = false; - SIMDJSON_TRY( visitor.visit_object_start(*this) ); - - { - auto key = advance(); - if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.increment_count(*this) ); - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - -object_field: - if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -object_continue: - switch (*advance()) { - case ',': - SIMDJSON_TRY( visitor.increment_count(*this) ); - { - auto key = advance(); - if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } - SIMDJSON_TRY( visitor.visit_key(*this, key) ); - } - goto object_field; - case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; - default: log_error("No comma between object fields"); return TAPE_ERROR; - } - -scope_end: - depth--; - if (depth == 0) { goto document_end; } - if (dom_parser.is_array[depth]) { goto array_continue; } - goto object_continue; - -// -// Array parser states -// -array_begin: - log_start_value("array"); - depth++; - if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } - dom_parser.is_array[depth] = true; - SIMDJSON_TRY( visitor.visit_array_start(*this) ); - SIMDJSON_TRY( visitor.increment_count(*this) ); - -array_value: - { - auto value = advance(); - switch (*value) { - case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; - case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; - default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; - } - } - -array_continue: - switch (*advance()) { - case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; - case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; - default: log_error("Missing comma between array values"); return TAPE_ERROR; - } - -document_end: - log_end_value("document"); - SIMDJSON_TRY( visitor.visit_document_end(*this) ); - - dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); - - // If we didn't make it to the end, it's an error - if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { - log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); - return TAPE_ERROR; - } - - return SUCCESS; - -} // walk_document() - -simdjson_really_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) - : buf{_dom_parser.buf}, - next_structural{&_dom_parser.structural_indexes[start_structural_index]}, - dom_parser{_dom_parser} { -} - -simdjson_really_inline const uint8_t *json_iterator::peek() const noexcept { - return &buf[*(next_structural)]; -} -simdjson_really_inline const uint8_t *json_iterator::advance() noexcept { - return &buf[*(next_structural++)]; -} -simdjson_really_inline size_t json_iterator::remaining_len() const noexcept { - return dom_parser.len - *(next_structural-1); -} - -simdjson_really_inline bool json_iterator::at_eof() const noexcept { - return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; -} -simdjson_really_inline bool json_iterator::at_beginning() const noexcept { - return next_structural == dom_parser.structural_indexes.get(); -} -simdjson_really_inline uint8_t json_iterator::last_structural() const noexcept { - return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; -} - -simdjson_really_inline void json_iterator::log_value(const char *type) const noexcept { - logger::log_line(*this, "", type, ""); -} - -simdjson_really_inline void json_iterator::log_start_value(const char *type) const noexcept { - logger::log_line(*this, "+", type, ""); - if (logger::LOG_ENABLED) { logger::log_depth++; } -} - -simdjson_really_inline void json_iterator::log_end_value(const char *type) const noexcept { - if (logger::LOG_ENABLED) { logger::log_depth--; } - logger::log_line(*this, "-", type, ""); -} - -simdjson_really_inline void json_iterator::log_error(const char *error) const noexcept { - logger::log_line(*this, "", "ERROR", error); -} - -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_root_string(*this, value); - case 't': return visitor.visit_root_true_atom(*this, value); - case 'f': return visitor.visit_root_false_atom(*this, value); - case 'n': return visitor.visit_root_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_root_number(*this, value); - default: - log_error("Document starts with a non-value character"); - return TAPE_ERROR; - } -} -template -simdjson_warn_unused simdjson_really_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { - switch (*value) { - case '"': return visitor.visit_string(*this, value); - case 't': return visitor.visit_true_atom(*this, value); - case 'f': return visitor.visit_false_atom(*this, value); - case 'n': return visitor.visit_null_atom(*this, value); - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return visitor.visit_number(*this, value); - default: - log_error("Non-value found when value was expected!"); - return TAPE_ERROR; - } -} - -} // namespace stage2 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file src/generic/stage2/json_iterator.h */ -/* begin file src/generic/stage2/tape_writer.h */ -namespace simdjson { -namespace westmere { -namespace { -namespace stage2 { - -struct tape_writer { - /** The next place to write to tape */ - uint64_t *next_tape_loc; - - /** Write a signed 64-bit value to tape. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; - - /** Write an unsigned 64-bit value to tape. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; - - /** Write a double value to tape. */ - simdjson_really_inline void append_double(double value) noexcept; - - /** - * Append a tape entry (an 8-bit type,and 56 bits worth of value). - */ - simdjson_really_inline void append(uint64_t val, internal::tape_type t) noexcept; - - /** - * Skip the current tape entry without writing. - * - * Used to skip the start of the container, since we'll come back later to fill it in when the - * container ends. - */ - simdjson_really_inline void skip() noexcept; - - /** - * Skip the number of tape entries necessary to write a large u64 or i64. - */ - simdjson_really_inline void skip_large_integer() noexcept; - - /** - * Skip the number of tape entries necessary to write a double. - */ - simdjson_really_inline void skip_double() noexcept; - - /** - * Write a value to a known location on tape. - * - * Used to go back and write out the start of a container after the container ends. - */ - simdjson_really_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; - -private: - /** - * Append both the tape entry, and a supplementary value following it. Used for types that need - * all 64 bits, such as double and uint64_t. - */ - template - simdjson_really_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; -}; // struct number_writer - -simdjson_really_inline void tape_writer::append_s64(int64_t value) noexcept { - append2(0, value, internal::tape_type::INT64); -} - -simdjson_really_inline void tape_writer::append_u64(uint64_t value) noexcept { - append(0, internal::tape_type::UINT64); - *next_tape_loc = value; - next_tape_loc++; -} - -/** Write a double value to tape. */ -simdjson_really_inline void tape_writer::append_double(double value) noexcept { - append2(0, value, internal::tape_type::DOUBLE); -} - -simdjson_really_inline void tape_writer::skip() noexcept { - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::skip_large_integer() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::skip_double() noexcept { - next_tape_loc += 2; -} - -simdjson_really_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { - *next_tape_loc = val | ((uint64_t(char(t))) << 56); - next_tape_loc++; -} - -template -simdjson_really_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { - append(val, t); - static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); - memcpy(next_tape_loc, &val2, sizeof(val2)); - next_tape_loc++; -} - -simdjson_really_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { - tape_loc = val | ((uint64_t(char(t))) << 56); -} - -} // namespace stage2 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file src/generic/stage2/tape_writer.h */ - -namespace simdjson { -namespace westmere { -namespace { -namespace stage2 { - -struct tape_builder { - template - simdjson_warn_unused static simdjson_really_inline error_code parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept; - - /** Called when a non-empty document starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_start(json_iterator &iter) noexcept; - /** Called when a non-empty document ends without error. */ - simdjson_warn_unused simdjson_really_inline error_code visit_document_end(json_iterator &iter) noexcept; - - /** Called when a non-empty array starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_start(json_iterator &iter) noexcept; - /** Called when a non-empty array ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_array_end(json_iterator &iter) noexcept; - /** Called when an empty array is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_array(json_iterator &iter) noexcept; - - /** Called when a non-empty object starts. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_start(json_iterator &iter) noexcept; - /** - * Called when a key in a field is encountered. - * - * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array - * will be called after this with the field value. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; - /** Called when a non-empty object ends. */ - simdjson_warn_unused simdjson_really_inline error_code visit_object_end(json_iterator &iter) noexcept; - /** Called when an empty object is found. */ - simdjson_warn_unused simdjson_really_inline error_code visit_empty_object(json_iterator &iter) noexcept; - - /** - * Called when a string, number, boolean or null is found. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; - /** - * Called when a string, number, boolean or null is found at the top level of a document (i.e. - * when there is no array or object and the entire document is a single string, number, boolean or - * null. - * - * This is separate from primitive() because simdjson's normal primitive parsing routines assume - * there is at least one more token after the value, which is only true in an array or object. - */ - simdjson_warn_unused simdjson_really_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; - simdjson_warn_unused simdjson_really_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; - - /** Called each time a new field or element in an array or object is found. */ - simdjson_warn_unused simdjson_really_inline error_code increment_count(json_iterator &iter) noexcept; - - /** Next location to write to tape */ - tape_writer tape; -private: - /** Next write location in the string buf for stage 2 parsing */ - uint8_t *current_string_buf_loc; - - simdjson_really_inline tape_builder(dom::document &doc) noexcept; - - simdjson_really_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; - simdjson_really_inline void start_container(json_iterator &iter) noexcept; - simdjson_warn_unused simdjson_really_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_warn_unused simdjson_really_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; - simdjson_really_inline uint8_t *on_start_string(json_iterator &iter) noexcept; - simdjson_really_inline void on_end_string(uint8_t *dst) noexcept; -}; // class tape_builder - -template -simdjson_warn_unused simdjson_really_inline error_code tape_builder::parse_document( - dom_parser_implementation &dom_parser, - dom::document &doc) noexcept { - dom_parser.doc = &doc; - json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); - tape_builder builder(doc); - return iter.walk_document(builder); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_root_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { - return iter.visit_primitive(*this, value); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { - return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { - start_container(iter); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { - return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { - constexpr uint32_t start_tape_index = 0; - tape.append(start_tape_index, internal::tape_type::ROOT); - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); - return SUCCESS; -} -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { - return visit_string(iter, key, true); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 - return SUCCESS; -} - -simdjson_really_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { - iter.log_value(key ? "key" : "string"); - uint8_t *dst = on_start_string(iter); - dst = stringparsing::parse_string(value+1, dst); - if (dst == nullptr) { - iter.log_error("Invalid escape in string"); - return STRING_ERROR; - } - on_end_string(dst); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { - return visit_string(iter, value); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("number"); - return numberparsing::parse_number(value, tape); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { - // - // We need to make a copy to make sure that the string is space terminated. - // This is not about padding the input, which should already padded up - // to len + SIMDJSON_PADDING. However, we have no control at this stage - // on how the padding was done. What if the input string was padded with nulls? - // It is quite common for an input string to have an extra null character (C string). - // We do not want to allow 9\0 (where \0 is the null character) inside a JSON - // document, but the string "9\0" by itself is fine. So we make a copy and - // pad the input with spaces when we know that there is just one input element. - // This copy is relatively expensive, but it will almost never be called in - // practice unless you are in the strange scenario where you have many JSON - // documents made of single atoms. - // - std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); - if (copy.get() == nullptr) { return MEMALLOC; } - std::memcpy(copy.get(), value, iter.remaining_len()); - std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); - error_code error = visit_number(iter, copy.get()); - return error; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("true"); - if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } - tape.append(0, internal::tape_type::TRUE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("false"); - if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } - tape.append(0, internal::tape_type::FALSE_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { - iter.log_value("null"); - if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } - tape.append(0, internal::tape_type::NULL_VALUE); - return SUCCESS; -} - -// private: - -simdjson_really_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { - return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - auto start_index = next_tape_index(iter); - tape.append(start_index+2, start); - tape.append(start_index, end); - return SUCCESS; -} - -simdjson_really_inline void tape_builder::start_container(json_iterator &iter) noexcept { - iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); - iter.dom_parser.open_containers[iter.depth].count = 0; - tape.skip(); // We don't actually *write* the start element until the end. -} - -simdjson_warn_unused simdjson_really_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { - // Write the ending tape element, pointing at the start location - const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; - tape.append(start_tape_index, end); - // Write the start tape element, pointing at the end location (and including count) - // count can overflow if it exceeds 24 bits... so we saturate - // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). - const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; - const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; - tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); - return SUCCESS; -} - -simdjson_really_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { - // we advance the point, accounting for the fact that we have a NULL termination - tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); - return current_string_buf_loc + sizeof(uint32_t); -} - -simdjson_really_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { - uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); - // TODO check for overflow in case someone has a crazy string (>=4GB?) - // But only add the overflow check when the document itself exceeds 4GB - // Currently unneeded because we refuse to parse docs larger or equal to 4GB. - memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); - // NULL termination is still handy if you expect all your strings to - // be NULL terminated? It comes at a small cost - *dst = 0; - current_string_buf_loc = dst + 1; -} - -} // namespace stage2 -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file src/generic/stage2/tape_builder.h */ - -// -// Implementation-specific overrides -// - -namespace simdjson { -namespace westmere { -namespace { -namespace stage1 { - -simdjson_really_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { - if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } - return find_escaped_branchless(backslash); -} - -} // namespace stage1 -} // unnamed namespace - -simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { - return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { - this->buf = _buf; - this->len = _len; - return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming); -} - -simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { - return westmere::stage1::generic_validate_utf8(buf,len); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { - return stage2::tape_builder::parse_document(*this, _doc); -} - -simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { - auto error = stage1(_buf, _len, stage1_mode::regular); - if (error) { return error; } - return stage2(_doc); -} - -} // namespace westmere -} // namespace simdjson - -/* begin file include/simdjson/westmere/end.h */ -SIMDJSON_UNTARGET_WESTMERE -/* end file include/simdjson/westmere/end.h */ -/* end file src/westmere/dom_parser_implementation.cpp */ -#endif - -SIMDJSON_POP_DISABLE_WARNINGS -/* end file src/simdjson.cpp */ diff --git a/simdjson/simdjson_source/simdjson.h b/simdjson/simdjson_source/simdjson.h deleted file mode 100644 index 67cb53b..0000000 --- a/simdjson/simdjson_source/simdjson.h +++ /dev/null @@ -1,32135 +0,0 @@ -/* auto-generated on 2022-06-02 13:56:20 -0400. Do not edit! */ -/* begin file include/simdjson.h */ -#ifndef SIMDJSON_H -#define SIMDJSON_H - -/** - * @mainpage - * - * Check the [README.md](https://github.com/simdjson/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second). - * - * Sample code. See https://github.com/simdjson/simdjson/blob/master/doc/basics.md for more examples. - - #include "simdjson.h" - - int main(void) { - // load from `twitter.json` file: - simdjson::dom::parser parser; - simdjson::dom::element tweets = parser.load("twitter.json"); - std::cout << tweets["search_metadata"]["count"] << " results." << std::endl; - - // Parse and iterate through an array of objects - auto abstract_json = R"( [ - { "12345" : {"a":12.34, "b":56.78, "c": 9998877} }, - { "12545" : {"a":11.44, "b":12.78, "c": 11111111} } - ] )"_padded; - - for (simdjson::dom::object obj : parser.parse(abstract_json)) { - for(const auto key_value : obj) { - cout << "key: " << key_value.key << " : "; - simdjson::dom::object innerobj = key_value.value; - cout << "a: " << double(innerobj["a"]) << ", "; - cout << "b: " << double(innerobj["b"]) << ", "; - cout << "c: " << int64_t(innerobj["c"]) << endl; - } - } - } - */ - -/* begin file include/simdjson/simdjson_version.h */ -// /include/simdjson/simdjson_version.h automatically generated by release.py, -// do not change by hand -#ifndef SIMDJSON_SIMDJSON_VERSION_H -#define SIMDJSON_SIMDJSON_VERSION_H - -/** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION 2.0.3 - -namespace simdjson { -enum { - /** - * The major version (MAJOR.minor.revision) of simdjson being used. - */ - SIMDJSON_VERSION_MAJOR = 2, - /** - * The minor version (major.MINOR.revision) of simdjson being used. - */ - SIMDJSON_VERSION_MINOR = 0, - /** - * The revision (major.minor.REVISION) of simdjson being used. - */ - SIMDJSON_VERSION_REVISION = 3 -}; -} // namespace simdjson - -#endif // SIMDJSON_SIMDJSON_VERSION_H -/* end file include/simdjson/simdjson_version.h */ -/* begin file include/simdjson/dom.h */ -#ifndef SIMDJSON_DOM_H -#define SIMDJSON_DOM_H - -/* begin file include/simdjson/base.h */ -#ifndef SIMDJSON_BASE_H -#define SIMDJSON_BASE_H - -/* begin file include/simdjson/compiler_check.h */ -#ifndef SIMDJSON_COMPILER_CHECK_H -#define SIMDJSON_COMPILER_CHECK_H - -#ifndef __cplusplus -#error simdjson requires a C++ compiler -#endif - -#ifndef SIMDJSON_CPLUSPLUS -#if defined(_MSVC_LANG) && !defined(__clang__) -#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) -#else -#define SIMDJSON_CPLUSPLUS __cplusplus -#endif -#endif - -// C++ 17 -#if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) -#define SIMDJSON_CPLUSPLUS17 1 -#endif - -// C++ 14 -#if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L) -#define SIMDJSON_CPLUSPLUS14 1 -#endif - -// C++ 11 -#if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L) -#define SIMDJSON_CPLUSPLUS11 1 -#endif - -#ifndef SIMDJSON_CPLUSPLUS11 -#error simdjson requires a compiler compliant with the C++11 standard -#endif - -#endif // SIMDJSON_COMPILER_CHECK_H -/* end file include/simdjson/compiler_check.h */ -/* begin file include/simdjson/common_defs.h */ -#ifndef SIMDJSON_COMMON_DEFS_H -#define SIMDJSON_COMMON_DEFS_H - -#include -/* begin file include/simdjson/portability.h */ -#ifndef SIMDJSON_PORTABILITY_H -#define SIMDJSON_PORTABILITY_H - -#include -#include -#include -#include -#include -#ifndef _WIN32 -// strcasecmp, strncasecmp -#include -#endif - -#ifdef _MSC_VER -#define SIMDJSON_VISUAL_STUDIO 1 -/** - * We want to differentiate carefully between - * clang under visual studio and regular visual - * studio. - * - * Under clang for Windows, we enable: - * * target pragmas so that part and only part of the - * code gets compiled for advanced instructions. - * - */ -#ifdef __clang__ -// clang under visual studio -#define SIMDJSON_CLANG_VISUAL_STUDIO 1 -#else -// just regular visual studio (best guess) -#define SIMDJSON_REGULAR_VISUAL_STUDIO 1 -#endif // __clang__ -#endif // _MSC_VER - -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -// https://en.wikipedia.org/wiki/C_alternative_tokens -// This header should have no effect, except maybe -// under Visual Studio. -#include -#endif - -#if defined(__x86_64__) || defined(_M_AMD64) -#define SIMDJSON_IS_X86_64 1 -#elif defined(__aarch64__) || defined(_M_ARM64) -#define SIMDJSON_IS_ARM64 1 -#elif defined(__PPC64__) || defined(_M_PPC64) -#define SIMDJSON_IS_PPC64 1 -#else -#define SIMDJSON_IS_32BITS 1 - -// We do not support 32-bit platforms, but it can be -// handy to identify them. -#if defined(_M_IX86) || defined(__i386__) -#define SIMDJSON_IS_X86_32BITS 1 -#elif defined(__arm__) || defined(_M_ARM) -#define SIMDJSON_IS_ARM_32BITS 1 -#elif defined(__PPC__) || defined(_M_PPC) -#define SIMDJSON_IS_PPC_32BITS 1 -#endif - -#endif // defined(__x86_64__) || defined(_M_AMD64) - -#ifdef SIMDJSON_IS_32BITS -#ifndef SIMDJSON_NO_PORTABILITY_WARNING -#pragma message("The simdjson library is designed \ -for 64-bit processors and it seems that you are not \ -compiling for a known 64-bit platform. All fast kernels \ -will be disabled and performance may be poor. Please \ -use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") -#endif // SIMDJSON_NO_PORTABILITY_WARNING -#endif // SIMDJSON_IS_32BITS - -// this is almost standard? -#undef SIMDJSON_STRINGIFY_IMPLEMENTATION_ -#undef SIMDJSON_STRINGIFY -#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) #a -#define SIMDJSON_STRINGIFY(a) SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) - -// Our fast kernels require 64-bit systems. -// -// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. -// Furthermore, the number of SIMD registers is reduced. -// -// On 32-bit ARM, we would have smaller registers. -// -// The simdjson users should still have the fallback kernel. It is -// slower, but it should run everywhere. - -// -// Enable valid runtime implementations, and select SIMDJSON_BUILTIN_IMPLEMENTATION -// - -// We are going to use runtime dispatch. -#ifdef SIMDJSON_IS_X86_64 -#ifdef __clang__ -// clang does not have GCC push pop -// warning: clang attribute push can't be used within a namespace in clang up -// til 8.0 so SIMDJSON_TARGET_REGION and SIMDJSON_UNTARGET_REGION must be *outside* of a -// namespace. -#define SIMDJSON_TARGET_REGION(T) \ - _Pragma(SIMDJSON_STRINGIFY( \ - clang attribute push(__attribute__((target(T))), apply_to = function))) -#define SIMDJSON_UNTARGET_REGION _Pragma("clang attribute pop") -#elif defined(__GNUC__) -// GCC is easier -#define SIMDJSON_TARGET_REGION(T) \ - _Pragma("GCC push_options") _Pragma(SIMDJSON_STRINGIFY(GCC target(T))) -#define SIMDJSON_UNTARGET_REGION _Pragma("GCC pop_options") -#endif // clang then gcc - -#endif // x86 - -// Default target region macros don't do anything. -#ifndef SIMDJSON_TARGET_REGION -#define SIMDJSON_TARGET_REGION(T) -#define SIMDJSON_UNTARGET_REGION -#endif - -// Is threading enabled? -#if defined(_REENTRANT) || defined(_MT) -#ifndef SIMDJSON_THREADS_ENABLED -#define SIMDJSON_THREADS_ENABLED -#endif -#endif - -// workaround for large stack sizes under -O0. -// https://github.com/simdjson/simdjson/issues/691 -#ifdef __APPLE__ -#ifndef __OPTIMIZE__ -// Apple systems have small stack sizes in secondary threads. -// Lack of compiler optimization may generate high stack usage. -// Users may want to disable threads for safety, but only when -// in debug mode which we detect by the fact that the __OPTIMIZE__ -// macro is not defined. -#undef SIMDJSON_THREADS_ENABLED -#endif -#endif - - -#if defined(__clang__) -#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) -#elif defined(__GNUC__) -#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) -#else -#define SIMDJSON_NO_SANITIZE_UNDEFINED -#endif - -#ifdef SIMDJSON_VISUAL_STUDIO -// This is one case where we do not distinguish between -// regular visual studio and clang under visual studio. -// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) -#define simdjson_strcasecmp _stricmp -#define simdjson_strncasecmp _strnicmp -#else -// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8). -// So they are only useful for ASCII in our context. -// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings -#define simdjson_strcasecmp strcasecmp -#define simdjson_strncasecmp strncasecmp -#endif - -#ifdef NDEBUG - -#ifdef SIMDJSON_VISUAL_STUDIO -#define SIMDJSON_UNREACHABLE() __assume(0) -#define SIMDJSON_ASSUME(COND) __assume(COND) -#else -#define SIMDJSON_UNREACHABLE() __builtin_unreachable(); -#define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0) -#endif - -#else // NDEBUG - -#define SIMDJSON_UNREACHABLE() assert(0); -#define SIMDJSON_ASSUME(COND) assert(COND) - -#endif - -#endif // SIMDJSON_PORTABILITY_H -/* end file include/simdjson/portability.h */ - -namespace simdjson { - -namespace internal { -/** - * @private - * Our own implementation of the C++17 to_chars function. - * Defined in src/to_chars - */ -char *to_chars(char *first, const char *last, double value); -/** - * @private - * A number parsing routine. - * Defined in src/from_chars - */ -double from_chars(const char *first) noexcept; -double from_chars(const char *first, const char* end) noexcept; - -} - -#ifndef SIMDJSON_EXCEPTIONS -#if __cpp_exceptions -#define SIMDJSON_EXCEPTIONS 1 -#else -#define SIMDJSON_EXCEPTIONS 0 -#endif -#endif - -/** The maximum document size supported by simdjson. */ -constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; - -/** - * The amount of padding needed in a buffer to parse JSON. - * - * the input buf should be readable up to buf + SIMDJSON_PADDING - * this is a stopgap; there should be a better description of the - * main loop and its behavior that abstracts over this - * See https://github.com/simdjson/simdjson/issues/174 - */ -constexpr size_t SIMDJSON_PADDING = 32; - -/** - * By default, simdjson supports this many nested objects and arrays. - * - * This is the default for parser::max_depth(). - */ -constexpr size_t DEFAULT_MAX_DEPTH = 1024; - -} // namespace simdjson - -#if defined(__GNUC__) - // Marks a block with a name so that MCA analysis can see it. - #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); - #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); - #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); -#else - #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) - #define SIMDJSON_END_DEBUG_BLOCK(name) - #define SIMDJSON_DEBUG_BLOCK(name, block) -#endif - -// Align to N-byte boundary -#define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) -#define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) - -#define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) - -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) - - #define simdjson_really_inline __forceinline - #define simdjson_never_inline __declspec(noinline) - - #define simdjson_unused - #define simdjson_warn_unused - - #ifndef simdjson_likely - #define simdjson_likely(x) x - #endif - #ifndef simdjson_unlikely - #define simdjson_unlikely(x) x - #endif - - #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) - #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) - #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) - // Get rid of Intellisense-only warnings (Code Analysis) - // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). - #ifdef __has_include - #if __has_include() - #include - #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) - #endif - #endif - - #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS - #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS - #endif - - #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996) - #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING - #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop )) - -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - - #define simdjson_really_inline inline __attribute__((always_inline)) - #define simdjson_never_inline inline __attribute__((noinline)) - - #define simdjson_unused __attribute__((unused)) - #define simdjson_warn_unused __attribute__((warn_unused_result)) - - #ifndef simdjson_likely - #define simdjson_likely(x) __builtin_expect(!!(x), 1) - #endif - #ifndef simdjson_unlikely - #define simdjson_unlikely(x) __builtin_expect(!!(x), 0) - #endif - - #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") - // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary - // We do it separately for clang since it has different warnings. - #ifdef __clang__ - // clang is missing -Wmaybe-uninitialized. - #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ - SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) - #else // __clang__ - #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ - SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) \ - SIMDJSON_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) - #endif // __clang__ - - #define SIMDJSON_PRAGMA(P) _Pragma(#P) - #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) - #if defined(SIMDJSON_CLANG_VISUAL_STUDIO) - #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include) - #else - #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS - #endif - #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations) - #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wstrict-overflow) - #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") - - - -#endif // MSC_VER - -#if defined(SIMDJSON_VISUAL_STUDIO) - /** - * Windows users need to do some extra work when building - * or using a dynamic library (DLL). When building, we need - * to set SIMDJSON_DLLIMPORTEXPORT to __declspec(dllexport). - * When *using* the DLL, the user needs to set - * SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport). - * - * Static libraries not need require such work. - * - * It does not matter here whether you are using - * the regular visual studio or clang under visual - * studio, you still need to handle these issues. - * - * Non-Windows systems do not have this complexity. - */ - #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY - // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows. - // It should never happen that both SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY and - // SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY are set. - #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport) - #elif SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY - // Windows user who call a dynamic library should set SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY to 1. - #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) - #else - // We assume by default static linkage - #define SIMDJSON_DLLIMPORTEXPORT - #endif - -/** - * Workaround for the vcpkg package manager. Only vcpkg should - * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. - */ -#if SIMDJSON_USING_LIBRARY -#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) -#endif -/** - * End of workaround for the vcpkg package manager. - */ -#else - #define SIMDJSON_DLLIMPORTEXPORT -#endif - -// C++17 requires string_view. -#if SIMDJSON_CPLUSPLUS17 -#define SIMDJSON_HAS_STRING_VIEW -#include // by the standard, this has to be safe. -#endif - -// This macro (__cpp_lib_string_view) has to be defined -// for C++17 and better, but if it is otherwise defined, -// we are going to assume that string_view is available -// even if we do not have C++17 support. -#ifdef __cpp_lib_string_view -#define SIMDJSON_HAS_STRING_VIEW -#endif - -// Some systems have string_view even if we do not have C++17 support, -// and even if __cpp_lib_string_view is undefined, it is the case -// with Apple clang version 11. -// We must handle it. *This is important.* -#ifndef SIMDJSON_HAS_STRING_VIEW -#if defined __has_include -// do not combine the next #if with the previous one (unsafe) -#if __has_include () -// now it is safe to trigger the include -#include // though the file is there, it does not follow that we got the implementation -#if defined(_LIBCPP_STRING_VIEW) -// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17, -// included string_view. -// This means that we have string_view *even though* we may not have C++17. -#define SIMDJSON_HAS_STRING_VIEW -#endif // _LIBCPP_STRING_VIEW -#endif // __has_include () -#endif // defined __has_include -#endif // def SIMDJSON_HAS_STRING_VIEW -// end of complicated but important routine to try to detect string_view. - -// -// Backfill std::string_view using nonstd::string_view on systems where -// we expect that string_view is missing. Important: if we get this wrong, -// we will end up with two string_view definitions and potential trouble. -// That is why we work so hard above to avoid it. -// -#ifndef SIMDJSON_HAS_STRING_VIEW -SIMDJSON_PUSH_DISABLE_ALL_WARNINGS -/* begin file include/simdjson/nonstd/string_view.hpp */ -// Copyright 2017-2020 by Martin Moene -// -// string-view lite, a C++17-like string_view for C++98 and later. -// For more information see https://github.com/martinmoene/string-view-lite -// -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once - -#ifndef NONSTD_SV_LITE_H_INCLUDED -#define NONSTD_SV_LITE_H_INCLUDED - -#define string_view_lite_MAJOR 1 -#define string_view_lite_MINOR 6 -#define string_view_lite_PATCH 0 - -#define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) - -#define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x ) -#define nssv_STRINGIFY_( x ) #x - -// string-view lite configuration: - -#define nssv_STRING_VIEW_DEFAULT 0 -#define nssv_STRING_VIEW_NONSTD 1 -#define nssv_STRING_VIEW_STD 2 - -// tweak header support: - -#ifdef __has_include -# if __has_include() -# include -# endif -#define nssv_HAVE_TWEAK_HEADER 1 -#else -#define nssv_HAVE_TWEAK_HEADER 0 -//# pragma message("string_view.hpp: Note: Tweak header not supported.") -#endif - -// string_view selection and configuration: - -#if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) -# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) -#endif - -#ifndef nssv_CONFIG_STD_SV_OPERATOR -# define nssv_CONFIG_STD_SV_OPERATOR 0 -#endif - -#ifndef nssv_CONFIG_USR_SV_OPERATOR -# define nssv_CONFIG_USR_SV_OPERATOR 1 -#endif - -#ifdef nssv_CONFIG_CONVERSION_STD_STRING -# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING -# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING -#endif - -#ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS -# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1 -#endif - -#ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS -# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 -#endif - -#ifndef nssv_CONFIG_NO_STREAM_INSERTION -# define nssv_CONFIG_NO_STREAM_INSERTION 0 -#endif - -// Control presence of exception handling (try and auto discover): - -#ifndef nssv_CONFIG_NO_EXCEPTIONS -# if _MSC_VER -# include // for _HAS_EXCEPTIONS -# endif -# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS) -# define nssv_CONFIG_NO_EXCEPTIONS 0 -# else -# define nssv_CONFIG_NO_EXCEPTIONS 1 -# endif -#endif - -// C++ language version detection (C++20 is speculative): -// Note: VC14.0/1900 (VS2015) lacks too much from C++14. - -#ifndef nssv_CPLUSPLUS -# if defined(_MSVC_LANG ) && !defined(__clang__) -# define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG ) -# else -# define nssv_CPLUSPLUS __cplusplus -# endif -#endif - -#define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L ) -#define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L ) -#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L ) -#define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L ) -#define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L ) -#define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202000L ) - -// use C++17 std::string_view if available and requested: - -#if nssv_CPP17_OR_GREATER && defined(__has_include ) -# if __has_include( ) -# define nssv_HAVE_STD_STRING_VIEW 1 -# else -# define nssv_HAVE_STD_STRING_VIEW 0 -# endif -#else -# define nssv_HAVE_STD_STRING_VIEW 0 -#endif - -#define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) ) - -#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW ) -#define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH - -// -// Use C++17 std::string_view: -// - -#if nssv_USES_STD_STRING_VIEW - -#include - -// Extensions for std::string: - -#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS - -namespace nonstd { - -template< class CharT, class Traits, class Allocator = std::allocator > -std::basic_string -to_string( std::basic_string_view v, Allocator const & a = Allocator() ) -{ - return std::basic_string( v.begin(), v.end(), a ); -} - -template< class CharT, class Traits, class Allocator > -std::basic_string_view -to_string_view( std::basic_string const & s ) -{ - return std::basic_string_view( s.data(), s.size() ); -} - -// Literal operators sv and _sv: - -#if nssv_CONFIG_STD_SV_OPERATOR - -using namespace std::literals::string_view_literals; - -#endif - -#if nssv_CONFIG_USR_SV_OPERATOR - -inline namespace literals { -inline namespace string_view_literals { - - -constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) -{ - return std::string_view{ str, len }; -} - -constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) -{ - return std::u16string_view{ str, len }; -} - -constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) -{ - return std::u32string_view{ str, len }; -} - -constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) -{ - return std::wstring_view{ str, len }; -} - -}} // namespace literals::string_view_literals - -#endif // nssv_CONFIG_USR_SV_OPERATOR - -} // namespace nonstd - -#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS - -namespace nonstd { - -using std::string_view; -using std::wstring_view; -using std::u16string_view; -using std::u32string_view; -using std::basic_string_view; - -// literal "sv" and "_sv", see above - -using std::operator==; -using std::operator!=; -using std::operator<; -using std::operator<=; -using std::operator>; -using std::operator>=; - -using std::operator<<; - -} // namespace nonstd - -#else // nssv_HAVE_STD_STRING_VIEW - -// -// Before C++17: use string_view lite: -// - -// Compiler versions: -// -// MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) -// MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) -// MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) -// MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) -// MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) -// MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) -// MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) -// MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) -// MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) -// MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) -// MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) - -#if defined(_MSC_VER ) && !defined(__clang__) -# define nssv_COMPILER_MSVC_VER (_MSC_VER ) -# define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) ) -#else -# define nssv_COMPILER_MSVC_VER 0 -# define nssv_COMPILER_MSVC_VERSION 0 -#endif - -#define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) - -#if defined( __apple_build_version__ ) -# define nssv_COMPILER_APPLECLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) -# define nssv_COMPILER_CLANG_VERSION 0 -#elif defined( __clang__ ) -# define nssv_COMPILER_APPLECLANG_VERSION 0 -# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) -#else -# define nssv_COMPILER_APPLECLANG_VERSION 0 -# define nssv_COMPILER_CLANG_VERSION 0 -#endif - -#if defined(__GNUC__) && !defined(__clang__) -# define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#else -# define nssv_COMPILER_GNUC_VERSION 0 -#endif - -// half-open range [lo..hi): -#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) ) - -// Presence of language and library features: - -#ifdef _HAS_CPP0X -# define nssv_HAS_CPP0X _HAS_CPP0X -#else -# define nssv_HAS_CPP0X 0 -#endif - -// Unless defined otherwise below, consider VC14 as C++11 for variant-lite: - -#if nssv_COMPILER_MSVC_VER >= 1900 -# undef nssv_CPP11_OR_GREATER -# define nssv_CPP11_OR_GREATER 1 -#endif - -#define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500) -#define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600) -#define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700) -#define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800) -#define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900) -#define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910) - -#define nssv_CPP14_000 (nssv_CPP14_OR_GREATER) -#define nssv_CPP17_000 (nssv_CPP17_OR_GREATER) - -// Presence of C++11 language features: - -#define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140 -#define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140 -#define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140 -#define nssv_HAVE_NOEXCEPT nssv_CPP11_140 -#define nssv_HAVE_NULLPTR nssv_CPP11_100 -#define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140 -#define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140 -#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140 -#define nssv_HAVE_WCHAR16_T nssv_CPP11_100 -#define nssv_HAVE_WCHAR32_T nssv_CPP11_100 - -#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) ) -# define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140 -#else -# define nssv_HAVE_STD_DEFINED_LITERALS 0 -#endif - -// Presence of C++14 language features: - -#define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000 - -// Presence of C++17 language features: - -#define nssv_HAVE_NODISCARD nssv_CPP17_000 - -// Presence of C++ library features: - -#define nssv_HAVE_STD_HASH nssv_CPP11_120 - -// Presence of compiler intrinsics: - -// Providing char-type specializations for compare() and length() that -// use compiler intrinsics can improve compile- and run-time performance. -// -// The challenge is in using the right combinations of builtin availability -// and its constexpr-ness. -// -// | compiler | __builtin_memcmp (constexpr) | memcmp (constexpr) | -// |----------|------------------------------|---------------------| -// | clang | 4.0 (>= 4.0 ) | any (? ) | -// | clang-a | 9.0 (>= 9.0 ) | any (? ) | -// | gcc | any (constexpr) | any (? ) | -// | msvc | >= 14.2 C++17 (>= 14.2 ) | any (? ) | - -#define nssv_HAVE_BUILTIN_VER ( (nssv_CPP17_000 && nssv_COMPILER_MSVC_VERSION >= 142) || nssv_COMPILER_GNUC_VERSION > 0 || nssv_COMPILER_CLANG_VERSION >= 400 || nssv_COMPILER_APPLECLANG_VERSION >= 900 ) -#define nssv_HAVE_BUILTIN_CE ( nssv_HAVE_BUILTIN_VER ) - -#define nssv_HAVE_BUILTIN_MEMCMP ( (nssv_HAVE_CONSTEXPR_14 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_14 ) -#define nssv_HAVE_BUILTIN_STRLEN ( (nssv_HAVE_CONSTEXPR_11 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_11 ) - -#ifdef __has_builtin -# define nssv_HAVE_BUILTIN( x ) __has_builtin( x ) -#else -# define nssv_HAVE_BUILTIN( x ) 0 -#endif - -#if nssv_HAVE_BUILTIN(__builtin_memcmp) || nssv_HAVE_BUILTIN_VER -# define nssv_BUILTIN_MEMCMP __builtin_memcmp -#else -# define nssv_BUILTIN_MEMCMP memcmp -#endif - -#if nssv_HAVE_BUILTIN(__builtin_strlen) || nssv_HAVE_BUILTIN_VER -# define nssv_BUILTIN_STRLEN __builtin_strlen -#else -# define nssv_BUILTIN_STRLEN strlen -#endif - -// C++ feature usage: - -#if nssv_HAVE_CONSTEXPR_11 -# define nssv_constexpr constexpr -#else -# define nssv_constexpr /*constexpr*/ -#endif - -#if nssv_HAVE_CONSTEXPR_14 -# define nssv_constexpr14 constexpr -#else -# define nssv_constexpr14 /*constexpr*/ -#endif - -#if nssv_HAVE_EXPLICIT_CONVERSION -# define nssv_explicit explicit -#else -# define nssv_explicit /*explicit*/ -#endif - -#if nssv_HAVE_INLINE_NAMESPACE -# define nssv_inline_ns inline -#else -# define nssv_inline_ns /*inline*/ -#endif - -#if nssv_HAVE_NOEXCEPT -# define nssv_noexcept noexcept -#else -# define nssv_noexcept /*noexcept*/ -#endif - -//#if nssv_HAVE_REF_QUALIFIER -//# define nssv_ref_qual & -//# define nssv_refref_qual && -//#else -//# define nssv_ref_qual /*&*/ -//# define nssv_refref_qual /*&&*/ -//#endif - -#if nssv_HAVE_NULLPTR -# define nssv_nullptr nullptr -#else -# define nssv_nullptr NULL -#endif - -#if nssv_HAVE_NODISCARD -# define nssv_nodiscard [[nodiscard]] -#else -# define nssv_nodiscard /*[[nodiscard]]*/ -#endif - -// Additional includes: - -#include -#include -#include -#include -#include // std::char_traits<> - -#if ! nssv_CONFIG_NO_STREAM_INSERTION -# include -#endif - -#if ! nssv_CONFIG_NO_EXCEPTIONS -# include -#endif - -#if nssv_CPP11_OR_GREATER -# include -#endif - -// Clang, GNUC, MSVC warning suppression macros: - -#if defined(__clang__) -# pragma clang diagnostic ignored "-Wreserved-user-defined-literal" -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wuser-defined-literals" -#elif defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wliteral-suffix" -#endif // __clang__ - -#if nssv_COMPILER_MSVC_VERSION >= 140 -# define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]] -# define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) ) -# define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes)) -#else -# define nssv_SUPPRESS_MSGSL_WARNING(expr) -# define nssv_SUPPRESS_MSVC_WARNING(code, descr) -# define nssv_DISABLE_MSVC_WARNINGS(codes) -#endif - -#if defined(__clang__) -# define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop") -#elif defined(__GNUC__) -# define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop") -#elif nssv_COMPILER_MSVC_VERSION >= 140 -# define nssv_RESTORE_WARNINGS() __pragma(warning(pop )) -#else -# define nssv_RESTORE_WARNINGS() -#endif - -// Suppress the following MSVC (GSL) warnings: -// - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not -// start with an underscore are reserved -// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions; -// use brace initialization, gsl::narrow_cast or gsl::narow -// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead - -nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) -//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" ) -//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix ) - -namespace nonstd { namespace sv_lite { - -namespace detail { - -// support constexpr comparison in C++14; -// for C++17 and later, use provided traits: - -template< typename CharT > -inline nssv_constexpr14 int compare( CharT const * s1, CharT const * s2, std::size_t count ) -{ - while ( count-- != 0 ) - { - if ( *s1 < *s2 ) return -1; - if ( *s1 > *s2 ) return +1; - ++s1; ++s2; - } - return 0; -} - -#if nssv_HAVE_BUILTIN_MEMCMP - -// specialization of compare() for char, see also generic compare() above: - -inline nssv_constexpr14 int compare( char const * s1, char const * s2, std::size_t count ) -{ - return nssv_BUILTIN_MEMCMP( s1, s2, count ); -} - -#endif - -#if nssv_HAVE_BUILTIN_STRLEN - -// specialization of length() for char, see also generic length() further below: - -inline nssv_constexpr std::size_t length( char const * s ) -{ - return nssv_BUILTIN_STRLEN( s ); -} - -#endif - -#if defined(__OPTIMIZE__) - -// gcc, clang provide __OPTIMIZE__ -// Expect tail call optimization to make length() non-recursive: - -template< typename CharT > -inline nssv_constexpr std::size_t length( CharT * s, std::size_t result = 0 ) -{ - return *s == '\0' ? result : length( s + 1, result + 1 ); -} - -#else // OPTIMIZE - -// non-recursive: - -template< typename CharT > -inline nssv_constexpr14 std::size_t length( CharT * s ) -{ - std::size_t result = 0; - while ( *s++ != '\0' ) - { - ++result; - } - return result; -} - -#endif // OPTIMIZE - -} // namespace detail - -template -< - class CharT, - class Traits = std::char_traits -> -class basic_string_view; - -// -// basic_string_view: -// - -template -< - class CharT, - class Traits /* = std::char_traits */ -> -class basic_string_view -{ -public: - // Member types: - - typedef Traits traits_type; - typedef CharT value_type; - - typedef CharT * pointer; - typedef CharT const * const_pointer; - typedef CharT & reference; - typedef CharT const & const_reference; - - typedef const_pointer iterator; - typedef const_pointer const_iterator; - typedef std::reverse_iterator< const_iterator > reverse_iterator; - typedef std::reverse_iterator< const_iterator > const_reverse_iterator; - - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - - // 24.4.2.1 Construction and assignment: - - nssv_constexpr basic_string_view() nssv_noexcept - : data_( nssv_nullptr ) - , size_( 0 ) - {} - -#if nssv_CPP11_OR_GREATER - nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default; -#else - nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept - : data_( other.data_) - , size_( other.size_) - {} -#endif - - nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept - : data_( s ) - , size_( count ) - {} - - nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept - : data_( s ) -#if nssv_CPP17_OR_GREATER - , size_( Traits::length(s) ) -#elif nssv_CPP11_OR_GREATER - , size_( detail::length(s) ) -#else - , size_( Traits::length(s) ) -#endif - {} - - // Assignment: - -#if nssv_CPP11_OR_GREATER - nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default; -#else - nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept - { - data_ = other.data_; - size_ = other.size_; - return *this; - } -#endif - - // 24.4.2.2 Iterator support: - - nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; } - nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; } - - nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); } - nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); } - - nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); } - nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); } - - nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); } - nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); } - - // 24.4.2.3 Capacity: - - nssv_constexpr size_type size() const nssv_noexcept { return size_; } - nssv_constexpr size_type length() const nssv_noexcept { return size_; } - nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); } - - // since C++20 - nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept - { - return 0 == size_; - } - - // 24.4.2.4 Element access: - - nssv_constexpr const_reference operator[]( size_type pos ) const - { - return data_at( pos ); - } - - nssv_constexpr14 const_reference at( size_type pos ) const - { -#if nssv_CONFIG_NO_EXCEPTIONS - assert( pos < size() ); -#else - if ( pos >= size() ) - { - throw std::out_of_range("nonstd::string_view::at()"); - } -#endif - return data_at( pos ); - } - - nssv_constexpr const_reference front() const { return data_at( 0 ); } - nssv_constexpr const_reference back() const { return data_at( size() - 1 ); } - - nssv_constexpr const_pointer data() const nssv_noexcept { return data_; } - - // 24.4.2.5 Modifiers: - - nssv_constexpr14 void remove_prefix( size_type n ) - { - assert( n <= size() ); - data_ += n; - size_ -= n; - } - - nssv_constexpr14 void remove_suffix( size_type n ) - { - assert( n <= size() ); - size_ -= n; - } - - nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept - { - const basic_string_view tmp(other); - other = *this; - *this = tmp; - } - - // 24.4.2.6 String operations: - - size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const - { -#if nssv_CONFIG_NO_EXCEPTIONS - assert( pos <= size() ); -#else - if ( pos > size() ) - { - throw std::out_of_range("nonstd::string_view::copy()"); - } -#endif - const size_type rlen = (std::min)( n, size() - pos ); - - (void) Traits::copy( dest, data() + pos, rlen ); - - return rlen; - } - - nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const - { -#if nssv_CONFIG_NO_EXCEPTIONS - assert( pos <= size() ); -#else - if ( pos > size() ) - { - throw std::out_of_range("nonstd::string_view::substr()"); - } -#endif - return basic_string_view( data() + pos, (std::min)( n, size() - pos ) ); - } - - // compare(), 6x: - - nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) - { -#if nssv_CPP17_OR_GREATER - if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) -#else - if ( const int result = detail::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) -#endif - { - return result; - } - - return size() == other.size() ? 0 : size() < other.size() ? -1 : 1; - } - - nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2) - { - return substr( pos1, n1 ).compare( other ); - } - - nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3) - { - return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); - } - - nssv_constexpr int compare( CharT const * s ) const // (4) - { - return compare( basic_string_view( s ) ); - } - - nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5) - { - return substr( pos1, n1 ).compare( basic_string_view( s ) ); - } - - nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6) - { - return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) ); - } - - // 24.4.2.7 Searching: - - // starts_with(), 3x, since C++20: - - nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1) - { - return size() >= v.size() && compare( 0, v.size(), v ) == 0; - } - - nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2) - { - return starts_with( basic_string_view( &c, 1 ) ); - } - - nssv_constexpr bool starts_with( CharT const * s ) const // (3) - { - return starts_with( basic_string_view( s ) ); - } - - // ends_with(), 3x, since C++20: - - nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1) - { - return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0; - } - - nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2) - { - return ends_with( basic_string_view( &c, 1 ) ); - } - - nssv_constexpr bool ends_with( CharT const * s ) const // (3) - { - return ends_with( basic_string_view( s ) ); - } - - // find(), 4x: - - nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) - { - return assert( v.size() == 0 || v.data() != nssv_nullptr ) - , pos >= size() - ? npos - : to_pos( std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); - } - - nssv_constexpr14 size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) - { - return find( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr14 size_type find( CharT const * s, size_type pos, size_type n ) const // (3) - { - return find( basic_string_view( s, n ), pos ); - } - - nssv_constexpr14 size_type find( CharT const * s, size_type pos = 0 ) const // (4) - { - return find( basic_string_view( s ), pos ); - } - - // rfind(), 4x: - - nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) - { - if ( size() < v.size() ) - { - return npos; - } - - if ( v.empty() ) - { - return (std::min)( size(), pos ); - } - - const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size(); - const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq ); - - return result != last ? size_type( result - cbegin() ) : npos; - } - - nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2) - { - return rfind( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3) - { - return rfind( basic_string_view( s, n ), pos ); - } - - nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4) - { - return rfind( basic_string_view( s ), pos ); - } - - // find_first_of(), 4x: - - nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) - { - return pos >= size() - ? npos - : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); - } - - nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) - { - return find_first_of( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3) - { - return find_first_of( basic_string_view( s, n ), pos ); - } - - nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4) - { - return find_first_of( basic_string_view( s ), pos ); - } - - // find_last_of(), 4x: - - nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) - { - return empty() - ? npos - : pos >= size() - ? find_last_of( v, size() - 1 ) - : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) ); - } - - nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) - { - return find_last_of( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3) - { - return find_last_of( basic_string_view( s, count ), pos ); - } - - nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4) - { - return find_last_of( basic_string_view( s ), pos ); - } - - // find_first_not_of(), 4x: - - nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) - { - return pos >= size() - ? npos - : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) ); - } - - nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) - { - return find_first_not_of( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3) - { - return find_first_not_of( basic_string_view( s, count ), pos ); - } - - nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4) - { - return find_first_not_of( basic_string_view( s ), pos ); - } - - // find_last_not_of(), 4x: - - nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) - { - return empty() - ? npos - : pos >= size() - ? find_last_not_of( v, size() - 1 ) - : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) ); - } - - nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) - { - return find_last_not_of( basic_string_view( &c, 1 ), pos ); - } - - nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3) - { - return find_last_not_of( basic_string_view( s, count ), pos ); - } - - nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4) - { - return find_last_not_of( basic_string_view( s ), pos ); - } - - // Constants: - -#if nssv_CPP17_OR_GREATER - static nssv_constexpr size_type npos = size_type(-1); -#elif nssv_CPP11_OR_GREATER - enum : size_type { npos = size_type(-1) }; -#else - enum { npos = size_type(-1) }; -#endif - -private: - struct not_in_view - { - const basic_string_view v; - - nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {} - - nssv_constexpr bool operator()( CharT c ) const - { - return npos == v.find_first_of( c ); - } - }; - - nssv_constexpr size_type to_pos( const_iterator it ) const - { - return it == cend() ? npos : size_type( it - cbegin() ); - } - - nssv_constexpr size_type to_pos( const_reverse_iterator it ) const - { - return it == crend() ? npos : size_type( crend() - it - 1 ); - } - - nssv_constexpr const_reference data_at( size_type pos ) const - { -#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 ) - return data_[pos]; -#else - return assert( pos < size() ), data_[pos]; -#endif - } - -private: - const_pointer data_; - size_type size_; - -public: -#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS - - template< class Allocator > - basic_string_view( std::basic_string const & s ) nssv_noexcept - : data_( s.data() ) - , size_( s.size() ) - {} - -#if nssv_HAVE_EXPLICIT_CONVERSION - - template< class Allocator > - explicit operator std::basic_string() const - { - return to_string( Allocator() ); - } - -#endif // nssv_HAVE_EXPLICIT_CONVERSION - -#if nssv_CPP11_OR_GREATER - - template< class Allocator = std::allocator > - std::basic_string - to_string( Allocator const & a = Allocator() ) const - { - return std::basic_string( begin(), end(), a ); - } - -#else - - std::basic_string - to_string() const - { - return std::basic_string( begin(), end() ); - } - - template< class Allocator > - std::basic_string - to_string( Allocator const & a ) const - { - return std::basic_string( begin(), end(), a ); - } - -#endif // nssv_CPP11_OR_GREATER - -#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS -}; - -// -// Non-member functions: -// - -// 24.4.3 Non-member comparison functions: -// lexicographically compare two string views (function template): - -template< class CharT, class Traits > -nssv_constexpr bool operator== ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } - -template< class CharT, class Traits > -nssv_constexpr bool operator!= ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return !( lhs == rhs ); } - -template< class CharT, class Traits > -nssv_constexpr bool operator< ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0; } - -template< class CharT, class Traits > -nssv_constexpr bool operator<= ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0; } - -template< class CharT, class Traits > -nssv_constexpr bool operator> ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0; } - -template< class CharT, class Traits > -nssv_constexpr bool operator>= ( - basic_string_view lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0; } - -// Let S be basic_string_view, and sv be an instance of S. -// Implementations shall provide sufficient additional overloads marked -// constexpr and noexcept so that an object t with an implicit conversion -// to S can be compared according to Table 67. - -#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) - -// accommodate for older compilers: - -// == - -template< class CharT, class Traits> -nssv_constexpr bool operator==( - basic_string_view lhs, - CharT const * rhs ) nssv_noexcept -{ return lhs.size() == detail::length( rhs ) && lhs.compare( rhs ) == 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator==( - CharT const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return detail::length( lhs ) == rhs.size() && rhs.compare( lhs ) == 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator==( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator==( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } - -// != - -template< class CharT, class Traits> -nssv_constexpr bool operator!=( - basic_string_view lhs, - CharT const * rhs ) nssv_noexcept -{ return !( lhs == rhs ); } - -template< class CharT, class Traits> -nssv_constexpr bool operator!=( - CharT const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return !( lhs == rhs ); } - -template< class CharT, class Traits> -nssv_constexpr bool operator!=( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return !( lhs == rhs ); } - -template< class CharT, class Traits> -nssv_constexpr bool operator!=( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return !( lhs == rhs ); } - -// < - -template< class CharT, class Traits> -nssv_constexpr bool operator<( - basic_string_view lhs, - CharT const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<( - CharT const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) > 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return rhs.compare( lhs ) > 0; } - -// <= - -template< class CharT, class Traits> -nssv_constexpr bool operator<=( - basic_string_view lhs, - CharT const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<=( - CharT const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) >= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<=( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator<=( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return rhs.compare( lhs ) >= 0; } - -// > - -template< class CharT, class Traits> -nssv_constexpr bool operator>( - basic_string_view lhs, - CharT const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>( - CharT const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) < 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return rhs.compare( lhs ) < 0; } - -// >= - -template< class CharT, class Traits> -nssv_constexpr bool operator>=( - basic_string_view lhs, - CharT const * rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>=( - CharT const * lhs, - basic_string_view rhs ) nssv_noexcept -{ return rhs.compare( lhs ) <= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>=( - basic_string_view lhs, - std::basic_string rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0; } - -template< class CharT, class Traits> -nssv_constexpr bool operator>=( - std::basic_string rhs, - basic_string_view lhs ) nssv_noexcept -{ return rhs.compare( lhs ) <= 0; } - -#else // newer compilers: - -#define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view >::type - -#if defined(_MSC_VER) // issue 40 -# define nssv_MSVC_ORDER(x) , int=x -#else -# define nssv_MSVC_ORDER(x) /*, int=x*/ -#endif - -// == - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator==( - basic_string_view lhs, - nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept -{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator==( - nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs, - basic_string_view rhs ) nssv_noexcept -{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } - -// != - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator!= ( - basic_string_view < CharT, Traits > lhs, - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return !( lhs == rhs ); } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator!= ( - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, - basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return !( lhs == rhs ); } - -// < - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator< ( - basic_string_view < CharT, Traits > lhs, - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0; } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator< ( - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, - basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) < 0; } - -// <= - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator<= ( - basic_string_view < CharT, Traits > lhs, - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0; } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator<= ( - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, - basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) <= 0; } - -// > - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator> ( - basic_string_view < CharT, Traits > lhs, - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0; } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator> ( - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, - basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) > 0; } - -// >= - -template< class CharT, class Traits nssv_MSVC_ORDER(1) > -nssv_constexpr bool operator>= ( - basic_string_view < CharT, Traits > lhs, - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0; } - -template< class CharT, class Traits nssv_MSVC_ORDER(2) > -nssv_constexpr bool operator>= ( - nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, - basic_string_view < CharT, Traits > rhs ) nssv_noexcept -{ return lhs.compare( rhs ) >= 0; } - -#undef nssv_MSVC_ORDER -#undef nssv_BASIC_STRING_VIEW_I - -#endif // compiler-dependent approach to comparisons - -// 24.4.4 Inserters and extractors: - -#if ! nssv_CONFIG_NO_STREAM_INSERTION - -namespace detail { - -template< class Stream > -void write_padding( Stream & os, std::streamsize n ) -{ - for ( std::streamsize i = 0; i < n; ++i ) - os.rdbuf()->sputc( os.fill() ); -} - -template< class Stream, class View > -Stream & write_to_stream( Stream & os, View const & sv ) -{ - typename Stream::sentry sentry( os ); - - if ( !os ) - return os; - - const std::streamsize length = static_cast( sv.length() ); - - // Whether, and how, to pad: - const bool pad = ( length < os.width() ); - const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right; - - if ( left_pad ) - write_padding( os, os.width() - length ); - - // Write span characters: - os.rdbuf()->sputn( sv.begin(), length ); - - if ( pad && !left_pad ) - write_padding( os, os.width() - length ); - - // Reset output stream width: - os.width( 0 ); - - return os; -} - -} // namespace detail - -template< class CharT, class Traits > -std::basic_ostream & -operator<<( - std::basic_ostream& os, - basic_string_view sv ) -{ - return detail::write_to_stream( os, sv ); -} - -#endif // nssv_CONFIG_NO_STREAM_INSERTION - -// Several typedefs for common character types are provided: - -typedef basic_string_view string_view; -typedef basic_string_view wstring_view; -#if nssv_HAVE_WCHAR16_T -typedef basic_string_view u16string_view; -typedef basic_string_view u32string_view; -#endif - -}} // namespace nonstd::sv_lite - -// -// 24.4.6 Suffix for basic_string_view literals: -// - -#if nssv_HAVE_USER_DEFINED_LITERALS - -namespace nonstd { -nssv_inline_ns namespace literals { -nssv_inline_ns namespace string_view_literals { - -#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS - -nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) -{ - return nonstd::sv_lite::string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) -{ - return nonstd::sv_lite::u16string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) -{ - return nonstd::sv_lite::u32string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) -{ - return nonstd::sv_lite::wstring_view{ str, len }; -} - -#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS - -#if nssv_CONFIG_USR_SV_OPERATOR - -nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) -{ - return nonstd::sv_lite::string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) -{ - return nonstd::sv_lite::u16string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) -{ - return nonstd::sv_lite::u32string_view{ str, len }; -} - -nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) -{ - return nonstd::sv_lite::wstring_view{ str, len }; -} - -#endif // nssv_CONFIG_USR_SV_OPERATOR - -}}} // namespace nonstd::literals::string_view_literals - -#endif - -// -// Extensions for std::string: -// - -#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS - -namespace nonstd { -namespace sv_lite { - -// Exclude MSVC 14 (19.00): it yields ambiguous to_string(): - -#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140 - -template< class CharT, class Traits, class Allocator = std::allocator > -std::basic_string -to_string( basic_string_view v, Allocator const & a = Allocator() ) -{ - return std::basic_string( v.begin(), v.end(), a ); -} - -#else - -template< class CharT, class Traits > -std::basic_string -to_string( basic_string_view v ) -{ - return std::basic_string( v.begin(), v.end() ); -} - -template< class CharT, class Traits, class Allocator > -std::basic_string -to_string( basic_string_view v, Allocator const & a ) -{ - return std::basic_string( v.begin(), v.end(), a ); -} - -#endif // nssv_CPP11_OR_GREATER - -template< class CharT, class Traits, class Allocator > -basic_string_view -to_string_view( std::basic_string const & s ) -{ - return basic_string_view( s.data(), s.size() ); -} - -}} // namespace nonstd::sv_lite - -#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS - -// -// make types and algorithms available in namespace nonstd: -// - -namespace nonstd { - -using sv_lite::basic_string_view; -using sv_lite::string_view; -using sv_lite::wstring_view; - -#if nssv_HAVE_WCHAR16_T -using sv_lite::u16string_view; -#endif -#if nssv_HAVE_WCHAR32_T -using sv_lite::u32string_view; -#endif - -// literal "sv" - -using sv_lite::operator==; -using sv_lite::operator!=; -using sv_lite::operator<; -using sv_lite::operator<=; -using sv_lite::operator>; -using sv_lite::operator>=; - -#if ! nssv_CONFIG_NO_STREAM_INSERTION -using sv_lite::operator<<; -#endif - -#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS -using sv_lite::to_string; -using sv_lite::to_string_view; -#endif - -} // namespace nonstd - -// 24.4.5 Hash support (C++11): - -// Note: The hash value of a string view object is equal to the hash value of -// the corresponding string object. - -#if nssv_HAVE_STD_HASH - -#include - -namespace std { - -template<> -struct hash< nonstd::string_view > -{ -public: - std::size_t operator()( nonstd::string_view v ) const nssv_noexcept - { - return std::hash()( std::string( v.data(), v.size() ) ); - } -}; - -template<> -struct hash< nonstd::wstring_view > -{ -public: - std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept - { - return std::hash()( std::wstring( v.data(), v.size() ) ); - } -}; - -template<> -struct hash< nonstd::u16string_view > -{ -public: - std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept - { - return std::hash()( std::u16string( v.data(), v.size() ) ); - } -}; - -template<> -struct hash< nonstd::u32string_view > -{ -public: - std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept - { - return std::hash()( std::u32string( v.data(), v.size() ) ); - } -}; - -} // namespace std - -#endif // nssv_HAVE_STD_HASH - -nssv_RESTORE_WARNINGS() - -#endif // nssv_HAVE_STD_STRING_VIEW -#endif // NONSTD_SV_LITE_H_INCLUDED -/* end file include/simdjson/nonstd/string_view.hpp */ -SIMDJSON_POP_DISABLE_WARNINGS - -namespace std { - using string_view = nonstd::string_view; -} -#endif // SIMDJSON_HAS_STRING_VIEW -#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. - -/// If EXPR is an error, returns it. -#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } - -#ifndef SIMDJSON_DEVELOPMENT_CHECKS -#ifndef NDEBUG -#define SIMDJSON_DEVELOPMENT_CHECKS -#endif -#endif - -// The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" -// feature. - -#if SIMDJSON_CPLUSPLUS17 -// if we have C++, then fallthrough is a default attribute -# define simdjson_fallthrough [[fallthrough]] -// check if we have __attribute__ support -#elif defined(__has_attribute) -// check if we have the __fallthrough__ attribute -#if __has_attribute(__fallthrough__) -// we are good to go: -# define simdjson_fallthrough __attribute__((__fallthrough__)) -#endif // __has_attribute(__fallthrough__) -#endif // SIMDJSON_CPLUSPLUS17 -// on some systems, we simply do not have support for fallthrough, so use a default: -#ifndef simdjson_fallthrough -# define simdjson_fallthrough do {} while (0) /* fallthrough */ -#endif // simdjson_fallthrough - -#endif // SIMDJSON_COMMON_DEFS_H -/* end file include/simdjson/common_defs.h */ - -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_UNDESIRED_WARNINGS - -// Public API -/* begin file include/simdjson/error.h */ -#ifndef SIMDJSON_ERROR_H -#define SIMDJSON_ERROR_H - -#include - -namespace simdjson { - -/** - * All possible errors returned by simdjson. These error codes are subject to change - * and not all simdjson kernel returns the same error code given the same input: it is not - * well defined which error a given input should produce. - * - * Only SUCCESS evaluates to false as a Boolean. All other error codes will evaluate - * to true as a Boolean. - */ -enum error_code { - SUCCESS = 0, ///< No error - CAPACITY, ///< This parser can't support a document that big - MEMALLOC, ///< Error allocating memory, most likely out of memory - TAPE_ERROR, ///< Something went wrong while writing to the tape (stage 2), this is a generic error - DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation - STRING_ERROR, ///< Problem while parsing a string - T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' - F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' - N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' - NUMBER_ERROR, ///< Problem while parsing a number - UTF8_ERROR, ///< the input is not valid UTF-8 - UNINITIALIZED, ///< unknown error, or uninitialized document - EMPTY, ///< no structural element found - UNESCAPED_CHARS, ///< found unescaped characters in a string. - UNCLOSED_STRING, ///< missing quote at the end - UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture - INCORRECT_TYPE, ///< JSON element has a different type than user expected - NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits - INDEX_OUT_OF_BOUNDS, ///< JSON array index too large - NO_SUCH_FIELD, ///< JSON field not found in object - IO_ERROR, ///< Error reading a file - INVALID_JSON_POINTER, ///< Invalid JSON pointer reference - INVALID_URI_FRAGMENT, ///< Invalid URI fragment - UNEXPECTED_ERROR, ///< indicative of a bug in simdjson - PARSER_IN_USE, ///< parser is already in use. - OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order - INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. - INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. - SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. - OUT_OF_BOUNDS, ///< Attempted to access location outside of document. - NUM_ERROR_CODES -}; - -/** - * Get the error message for the given error code. - * - * dom::parser parser; - * dom::element doc; - * auto error = parser.parse("foo",3).get(doc); - * if (error) { printf("Error: %s\n", error_message(error)); } - * - * @return The error message. - */ -inline const char *error_message(error_code error) noexcept; - -/** - * Write the error message to the output stream - */ -inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept; - -/** - * Exception thrown when an exception-supporting simdjson method is called - */ -struct simdjson_error : public std::exception { - /** - * Create an exception from a simdjson error code. - * @param error The error code - */ - simdjson_error(error_code error) noexcept : _error{error} { } - /** The error message */ - const char *what() const noexcept { return error_message(error()); } - /** The error code */ - error_code error() const noexcept { return _error; } -private: - /** The error code that was used */ - error_code _error; -}; - -namespace internal { - -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::simdjson_result_base { - * simdjson_result() noexcept : internal::simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ -template -struct simdjson_result_base : protected std::pair { - - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_really_inline simdjson_result_base() noexcept; - - /** - * Create a new error result. - */ - simdjson_really_inline simdjson_result_base(error_code error) noexcept; - - /** - * Create a new successful result. - */ - simdjson_really_inline simdjson_result_base(T &&value) noexcept; - - /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_really_inline simdjson_result_base(T &&value, error_code error) noexcept; - - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_really_inline void tie(T &value, error_code &error) && noexcept; - - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_really_inline error_code get(T &value) && noexcept; - - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_really_inline const T &value(error_code &error) const & noexcept; - - /** - * The error. - */ - simdjson_really_inline error_code error() const noexcept; - -#if SIMDJSON_EXCEPTIONS - - /** - * Get the result value. - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T& value() & noexcept(false); - - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T&& value() && noexcept(false); - - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T&& take_value() && noexcept(false); - - /** - * Cast to the value (will throw on error). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline operator T&&() && noexcept(false); -#endif // SIMDJSON_EXCEPTIONS - - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_really_inline const T& value_unsafe() const& noexcept; - - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_really_inline T&& value_unsafe() && noexcept; - -}; // struct simdjson_result_base - -} // namespace internal - -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - */ -template -struct simdjson_result : public internal::simdjson_result_base { - /** - * @private Create a new empty result with error = UNINITIALIZED. - */ - simdjson_really_inline simdjson_result() noexcept; - /** - * @private Create a new error result. - */ - simdjson_really_inline simdjson_result(T &&value) noexcept; - /** - * @private Create a new successful result. - */ - simdjson_really_inline simdjson_result(error_code error_code) noexcept; - /** - * @private Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_really_inline simdjson_result(T &&value, error_code error) noexcept; - - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_really_inline void tie(T &value, error_code &error) && noexcept; - - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_warn_unused simdjson_really_inline error_code get(T &value) && noexcept; - - /** - * The error. - */ - simdjson_really_inline error_code error() const noexcept; - -#if SIMDJSON_EXCEPTIONS - - /** - * Get the result value. - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T& value() & noexcept(false); - - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T&& value() && noexcept(false); - - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T&& take_value() && noexcept(false); - - /** - * Cast to the value (will throw on error). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline operator T&&() && noexcept(false); -#endif // SIMDJSON_EXCEPTIONS - - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_really_inline const T& value_unsafe() const& noexcept; - - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_really_inline T&& value_unsafe() && noexcept; - -}; // struct simdjson_result - -#if SIMDJSON_EXCEPTIONS - -template -inline std::ostream& operator<<(std::ostream& out, simdjson_result value) { return out << value.value(); } -#endif // SIMDJSON_EXCEPTIONS - -#ifndef SIMDJSON_DISABLE_DEPRECATED_API -/** - * @deprecated This is an alias and will be removed, use error_code instead - */ -using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code; - -/** - * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead. - */ -[[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]] -inline const std::string error_message(int error) noexcept; -#endif // SIMDJSON_DISABLE_DEPRECATED_API -} // namespace simdjson - -#endif // SIMDJSON_ERROR_H -/* end file include/simdjson/error.h */ -/* begin file include/simdjson/minify.h */ -#ifndef SIMDJSON_MINIFY_H -#define SIMDJSON_MINIFY_H - -/* begin file include/simdjson/padded_string.h */ -#ifndef SIMDJSON_PADDED_STRING_H -#define SIMDJSON_PADDED_STRING_H - -#include -#include -#include -#include - -namespace simdjson { - -class padded_string_view; - -/** - * String with extra allocation for ease of use with parser::parse() - * - * This is a move-only class, it cannot be copied. - */ -struct padded_string final { - - /** - * Create a new, empty padded string. - */ - explicit inline padded_string() noexcept; - /** - * Create a new padded string buffer. - * - * @param length the size of the string. - */ - explicit inline padded_string(size_t length) noexcept; - /** - * Create a new padded string by copying the given input. - * - * @param data the buffer to copy - * @param length the number of bytes to copy - */ - explicit inline padded_string(const char *data, size_t length) noexcept; - /** - * Create a new padded string by copying the given input. - * - * @param str_ the string to copy - */ - inline padded_string(const std::string & str_ ) noexcept; - /** - * Create a new padded string by copying the given input. - * - * @param sv_ the string to copy - */ - inline padded_string(std::string_view sv_) noexcept; - /** - * Move one padded string into another. - * - * The original padded string will be reduced to zero capacity. - * - * @param o the string to move. - */ - inline padded_string(padded_string &&o) noexcept; - /** - * Move one padded string into another. - * - * The original padded string will be reduced to zero capacity. - * - * @param o the string to move. - */ - inline padded_string &operator=(padded_string &&o) noexcept; - inline void swap(padded_string &o) noexcept; - ~padded_string() noexcept; - - /** - * The length of the string. - * - * Does not include padding. - */ - size_t size() const noexcept; - - /** - * The length of the string. - * - * Does not include padding. - */ - size_t length() const noexcept; - - /** - * The string data. - **/ - const char *data() const noexcept; - const uint8_t *u8data() const noexcept { return static_cast(static_cast(data_ptr));} - - /** - * The string data. - **/ - char *data() noexcept; - - /** - * Create a std::string_view with the same content. - */ - operator std::string_view() const; - - /** - * Create a padded_string_view with the same content. - */ - operator padded_string_view() const noexcept; - - /** - * Load this padded string from a file. - * - * @return IO_ERROR on error. Be mindful that on some 32-bit systems, - * the file size might be limited to 2 GB. - * - * @param path the path to the file. - **/ - inline static simdjson_result load(std::string_view path) noexcept; - -private: - padded_string &operator=(const padded_string &o) = delete; - padded_string(const padded_string &o) = delete; - - size_t viable_size{0}; - char *data_ptr{nullptr}; - -}; // padded_string - -/** - * Send padded_string instance to an output stream. - * - * @param out The output stream. - * @param s The padded_string instance. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, const padded_string& s) { return out << s.data(); } - -#if SIMDJSON_EXCEPTIONS -/** - * Send padded_string instance to an output stream. - * - * @param out The output stream. - * @param s The padded_string instance. - * @throw simdjson_error if the result being printed has an error. If there is an error with the - * underlying output stream, that error will be propagated (simdjson_error will not be - * thrown). - */ -inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } -#endif - -} // namespace simdjson - -// This is deliberately outside of simdjson so that people get it without having to use the namespace -inline simdjson::padded_string operator "" _padded(const char *str, size_t len) { - return simdjson::padded_string(str, len); -} - -namespace simdjson { -namespace internal { - -// The allocate_padded_buffer function is a low-level function to allocate memory -// with padding so we can read past the "length" bytes safely. It is used by -// the padded_string class automatically. It returns nullptr in case -// of error: the caller should check for a null pointer. -// The length parameter is the maximum size in bytes of the string. -// The caller is responsible to free the memory (e.g., delete[] (...)). -inline char *allocate_padded_buffer(size_t length) noexcept; - -} // namespace internal -} // namespace simdjson - -#endif // SIMDJSON_PADDED_STRING_H -/* end file include/simdjson/padded_string.h */ -#include -#include -#include - -namespace simdjson { - - - -/** - * - * Minify the input string assuming that it represents a JSON string, does not parse or validate. - * This function is much faster than parsing a JSON string and then writing a minified version of it. - * However, it does not validate the input. It will merely return an error in simple cases (e.g., if - * there is a string that was never terminated). - * - * - * @param buf the json document to minify. - * @param len the length of the json document. - * @param dst the buffer to write the minified document to. *MUST* be allocated up to len bytes. - * @param dst_len the number of bytes written. Output only. - * @return the error code, or SUCCESS if there was no error. - */ -simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept; - -} // namespace simdjson - -#endif // SIMDJSON_MINIFY_H -/* end file include/simdjson/minify.h */ -/* begin file include/simdjson/padded_string_view.h */ -#ifndef SIMDJSON_PADDED_STRING_VIEW_H -#define SIMDJSON_PADDED_STRING_VIEW_H - - -#include -#include -#include -#include - -namespace simdjson { - -/** - * User-provided string that promises it has extra padded bytes at the end for use with parser::parse(). - */ -class padded_string_view : public std::string_view { -private: - size_t _capacity; - -public: - /** Create an empty padded_string_view. */ - inline padded_string_view() noexcept = default; - - /** - * Promise the given buffer has at least SIMDJSON_PADDING extra bytes allocated to it. - * - * @param s The string. - * @param len The length of the string (not including padding). - * @param capacity The allocated length of the string, including padding. - */ - explicit inline padded_string_view(const char* s, size_t len, size_t capacity) noexcept; - /** overload explicit inline padded_string_view(const char* s, size_t len) noexcept */ - explicit inline padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept; - - /** - * Promise the given string has at least SIMDJSON_PADDING extra bytes allocated to it. - * - * The capacity of the string will be used to determine its padding. - * - * @param s The string. - */ - explicit inline padded_string_view(const std::string &s) noexcept; - - /** - * Promise the given string_view has at least SIMDJSON_PADDING extra bytes allocated to it. - * - * @param s The string. - * @param capacity The allocated length of the string, including padding. - */ - explicit inline padded_string_view(std::string_view s, size_t capacity) noexcept; - - /** The number of allocated bytes. */ - inline size_t capacity() const noexcept; - - /** The amount of padding on the string (capacity() - length()) */ - inline size_t padding() const noexcept; - -}; // padded_string_view - -#if SIMDJSON_EXCEPTIONS -/** - * Send padded_string instance to an output stream. - * - * @param out The output stream. - * @param s The padded_string_view. - * @throw simdjson_error if the result being printed has an error. If there is an error with the - * underlying output stream, that error will be propagated (simdjson_error will not be - * thrown). - */ -inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } -#endif - -} // namespace simdjson - -#endif // SIMDJSON_PADDED_STRING_VIEW_H -/* end file include/simdjson/padded_string_view.h */ -/* begin file include/simdjson/implementation.h */ -#ifndef SIMDJSON_IMPLEMENTATION_H -#define SIMDJSON_IMPLEMENTATION_H - -/* begin file include/simdjson/internal/dom_parser_implementation.h */ -#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H -#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H - -#include - -namespace simdjson { - -namespace dom { -class document; -} // namespace dom - -/** -* This enum is used with the dom_parser_implementation::stage1 function. -* 1) The regular mode expects a fully formed JSON document. -* 2) The streaming_partial mode expects a possibly truncated -* input within a stream on JSON documents. -* 3) The stream_final mode allows us to truncate final -* unterminated strings. It is useful in conjunction with streaming_partial. -*/ -enum class stage1_mode { regular, streaming_partial, streaming_final}; - -/** - * Returns true if mode == streaming_partial or mode == streaming_final - */ -inline bool is_streaming(stage1_mode mode) { - // performance note: it is probably faster to check that mode is different - // from regular than checking that it is either streaming_partial or streaming_final. - return (mode != stage1_mode::regular); - // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final); -} - - -namespace internal { - - -/** - * An implementation of simdjson's DOM parser for a particular CPU architecture. - * - * This class is expected to be accessed only by pointer, and never move in memory (though the - * pointer can move). - */ -class dom_parser_implementation { -public: - - /** - * @private For internal implementation use - * - * Run a full JSON parse on a single document (stage1 + stage2). - * - * Guaranteed only to be called when capacity > document length. - * - * Overridden by each implementation. - * - * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. - * @param len The length of the json document. - * @return The error code, or SUCCESS if there was no error. - */ - simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; - - /** - * @private For internal implementation use - * - * Stage 1 of the document parser. - * - * Guaranteed only to be called when capacity > document length. - * - * Overridden by each implementation. - * - * @param buf The json document to parse. - * @param len The length of the json document. - * @param streaming Whether this is being called by parser::parse_many. - * @return The error code, or SUCCESS if there was no error. - */ - simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0; - - /** - * @private For internal implementation use - * - * Stage 2 of the document parser. - * - * Called after stage1(). - * - * Overridden by each implementation. - * - * @param doc The document to output to. - * @return The error code, or SUCCESS if there was no error. - */ - simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0; - - /** - * @private For internal implementation use - * - * Stage 2 of the document parser for parser::parse_many. - * - * Guaranteed only to be called after stage1(). - * Overridden by each implementation. - * - * @param doc The document to output to. - * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. - */ - simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0; - - /** - * Change the capacity of this parser. - * - * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB) - * and an CAPACITY error is returned if it is attempted. - * - * Generally used for reallocation. - * - * @param capacity The new capacity. - * @param max_depth The new max_depth. - * @return The error code, or SUCCESS if there was no error. - */ - virtual error_code set_capacity(size_t capacity) noexcept = 0; - - /** - * Change the max depth of this parser. - * - * Generally used for reallocation. - * - * @param capacity The new capacity. - * @param max_depth The new max_depth. - * @return The error code, or SUCCESS if there was no error. - */ - virtual error_code set_max_depth(size_t max_depth) noexcept = 0; - - /** - * Deallocate this parser. - */ - virtual ~dom_parser_implementation() = default; - - /** Number of structural indices passed from stage 1 to stage 2 */ - uint32_t n_structural_indexes{0}; - /** Structural indices passed from stage 1 to stage 2 */ - std::unique_ptr structural_indexes{}; - /** Next structural index to parse */ - uint32_t next_structural_index{0}; - - /** - * The largest document this parser can support without reallocating. - * - * @return Current capacity, in bytes. - */ - simdjson_really_inline size_t capacity() const noexcept; - - /** - * The maximum level of nested object and arrays supported by this parser. - * - * @return Maximum depth, in bytes. - */ - simdjson_really_inline size_t max_depth() const noexcept; - - /** - * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length - * and `max_depth` depth. - * - * @param capacity The new capacity. - * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. - * @return The error, if there is one. - */ - simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept; - -protected: - /** - * The maximum document length this parser supports. - * - * Buffers are large enough to handle any document up to this length. - */ - size_t _capacity{0}; - - /** - * The maximum depth (number of nested objects and arrays) supported by this parser. - * - * Defaults to DEFAULT_MAX_DEPTH. - */ - size_t _max_depth{0}; - - // Declaring these so that subclasses can use them to implement their constructors. - simdjson_really_inline dom_parser_implementation() noexcept; - simdjson_really_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - simdjson_really_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - - simdjson_really_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; - simdjson_really_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; -}; // class dom_parser_implementation - -simdjson_really_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -simdjson_really_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -simdjson_really_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - -simdjson_really_inline size_t dom_parser_implementation::capacity() const noexcept { - return _capacity; -} - -simdjson_really_inline size_t dom_parser_implementation::max_depth() const noexcept { - return _max_depth; -} - -simdjson_warn_unused -inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { - if (this->max_depth() != max_depth) { - error_code err = set_max_depth(max_depth); - if (err) { return err; } - } - if (_capacity != capacity) { - error_code err = set_capacity(capacity); - if (err) { return err; } - } - return SUCCESS; -} - -} // namespace internal -} // namespace simdjson - -#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H -/* end file include/simdjson/internal/dom_parser_implementation.h */ -/* begin file include/simdjson/internal/isadetection.h */ -/* From -https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h -Highly modified. - -Copyright (c) 2016- Facebook, Inc (Adam Paszke) -Copyright (c) 2014- Facebook, Inc (Soumith Chintala) -Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) -Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) -Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) -Copyright (c) 2011-2013 NYU (Clement Farabet) -Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, -Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute -(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, -Samy Bengio, Johnny Mariethoz) - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories -America and IDIAP Research Institute nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef SIMDJSON_INTERNAL_ISADETECTION_H -#define SIMDJSON_INTERNAL_ISADETECTION_H - -#include -#include -#if defined(_MSC_VER) -#include -#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) -#include -#endif - -namespace simdjson { -namespace internal { - - -enum instruction_set { - DEFAULT = 0x0, - NEON = 0x1, - AVX2 = 0x4, - SSE42 = 0x8, - PCLMULQDQ = 0x10, - BMI1 = 0x20, - BMI2 = 0x40, - ALTIVEC = 0x80, - AVX512F = 0x100, - AVX512DQ = 0x200, - AVX512IFMA = 0x400, - AVX512PF = 0x800, - AVX512ER = 0x1000, - AVX512CD = 0x2000, - AVX512BW = 0x4000, - AVX512VL = 0x8000, - AVX512VBMI2 = 0x10000 -}; - -#if defined(__PPC64__) - -static inline uint32_t detect_supported_architectures() { - return instruction_set::ALTIVEC; -} - -#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 - -#if defined(__ARM_NEON) - -static inline uint32_t detect_supported_architectures() { - return instruction_set::NEON; -} - -#else // ARM without NEON - -static inline uint32_t detect_supported_architectures() { - return instruction_set::DEFAULT; -} - -#endif - -#elif defined(__x86_64__) || defined(_M_AMD64) // x64 - - -namespace { -// Can be found on Intel ISA Reference for CPUID -constexpr uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 -constexpr uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 -constexpr uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 -constexpr uint32_t cpuid_avx512f_bit = 1 << 16; ///< @private bit 16 of EBX for EAX=0x7 -constexpr uint32_t cpuid_avx512dq_bit = 1 << 17; ///< @private bit 17 of EBX for EAX=0x7 -constexpr uint32_t cpuid_avx512ifma_bit = 1 << 21; ///< @private bit 21 of EBX for EAX=0x7 -constexpr uint32_t cpuid_avx512pf_bit = 1 << 26; ///< @private bit 26 of EBX for EAX=0x7 -constexpr uint32_t cpuid_avx512er_bit = 1 << 27; ///< @private bit 27 of EBX for EAX=0x7 -constexpr uint32_t cpuid_avx512cd_bit = 1 << 28; ///< @private bit 28 of EBX for EAX=0x7 -constexpr uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7 -constexpr uint32_t cpuid_avx512vl_bit = 1 << 31; ///< @private bit 31 of EBX for EAX=0x7 -constexpr uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7 -constexpr uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 -constexpr uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 -} - - - -static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, - uint32_t *edx) { -#if defined(_MSC_VER) - int cpu_info[4]; - __cpuid(cpu_info, *eax); - *eax = cpu_info[0]; - *ebx = cpu_info[1]; - *ecx = cpu_info[2]; - *edx = cpu_info[3]; -#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) - uint32_t level = *eax; - __get_cpuid(level, eax, ebx, ecx, edx); -#else - uint32_t a = *eax, b, c = *ecx, d; - asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); - *eax = a; - *ebx = b; - *ecx = c; - *edx = d; -#endif -} - -static inline uint32_t detect_supported_architectures() { - uint32_t eax, ebx, ecx, edx; - uint32_t host_isa = 0x0; - - // ECX for EAX=0x7 - eax = 0x7; - ecx = 0x0; - cpuid(&eax, &ebx, &ecx, &edx); - if (ebx & cpuid_avx2_bit) { - host_isa |= instruction_set::AVX2; - } - if (ebx & cpuid_bmi1_bit) { - host_isa |= instruction_set::BMI1; - } - - if (ebx & cpuid_bmi2_bit) { - host_isa |= instruction_set::BMI2; - } - - if (ebx & cpuid_avx512f_bit) { - host_isa |= instruction_set::AVX512F; - } - - if (ebx & cpuid_avx512dq_bit) { - host_isa |= instruction_set::AVX512DQ; - } - - if (ebx & cpuid_avx512ifma_bit) { - host_isa |= instruction_set::AVX512IFMA; - } - - if (ebx & cpuid_avx512pf_bit) { - host_isa |= instruction_set::AVX512PF; - } - - if (ebx & cpuid_avx512er_bit) { - host_isa |= instruction_set::AVX512ER; - } - - if (ebx & cpuid_avx512cd_bit) { - host_isa |= instruction_set::AVX512CD; - } - - if (ebx & cpuid_avx512bw_bit) { - host_isa |= instruction_set::AVX512BW; - } - - if (ebx & cpuid_avx512vl_bit) { - host_isa |= instruction_set::AVX512VL; - } - - if (ecx & cpuid_avx512vbmi2_bit) { - host_isa |= instruction_set::AVX512VBMI2; - } - - // EBX for EAX=0x1 - eax = 0x1; - cpuid(&eax, &ebx, &ecx, &edx); - - if (ecx & cpuid_sse42_bit) { - host_isa |= instruction_set::SSE42; - } - - if (ecx & cpuid_pclmulqdq_bit) { - host_isa |= instruction_set::PCLMULQDQ; - } - - return host_isa; -} -#else // fallback - - -static inline uint32_t detect_supported_architectures() { - return instruction_set::DEFAULT; -} - - -#endif // end SIMD extension detection code - -} // namespace internal -} // namespace simdjson - -#endif // SIMDJSON_INTERNAL_ISADETECTION_H -/* end file include/simdjson/internal/isadetection.h */ -#include -#include -#include - -namespace simdjson { - -/** - * Validate the UTF-8 string. - * - * @param buf the string to validate. - * @param len the length of the string in bytes. - * @return true if the string is valid UTF-8. - */ -simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; - - -/** - * Validate the UTF-8 string. - * - * @param sv the string_view to validate. - * @return true if the string is valid UTF-8. - */ -simdjson_really_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { - return validate_utf8(sv.data(), sv.size()); -} - -/** - * Validate the UTF-8 string. - * - * @param p the string to validate. - * @return true if the string is valid UTF-8. - */ -simdjson_really_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { - return validate_utf8(s.data(), s.size()); -} - -namespace dom { - class document; -} // namespace dom - -/** - * An implementation of simdjson for a particular CPU architecture. - * - * Also used to maintain the currently active implementation. The active implementation is - * automatically initialized on first use to the most advanced implementation supported by the host. - */ -class implementation { -public: - - /** - * The name of this implementation. - * - * const implementation *impl = simdjson::get_active_implementation(); - * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; - * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" - */ - virtual const std::string &name() const { return _name; } - - /** - * The description of this implementation. - * - * const implementation *impl = simdjson::get_active_implementation(); - * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; - * - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" - */ - virtual const std::string &description() const { return _description; } - - /** - * The instruction sets this implementation is compiled against - * and the current CPU match. This function may poll the current CPU/system - * and should therefore not be called too often if performance is a concern. - * - * - * @return true if the implementation can be safely used on the current system (determined at runtime) - */ - bool supported_by_runtime_system() const; - - /** - * @private For internal implementation use - * - * The instruction sets this implementation is compiled against. - * - * @return a mask of all required `internal::instruction_set::` values - */ - virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }; - - /** - * @private For internal implementation use - * - * const implementation *impl = simdjson::get_active_implementation(); - * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; - * - * @param capacity The largest document that will be passed to the parser. - * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. - * @param dst The place to put the resulting parser implementation. - * @return the name of the implementation, e.g. "haswell", "westmere", "arm64" - */ - virtual error_code create_dom_parser_implementation( - size_t capacity, - size_t max_depth, - std::unique_ptr &dst - ) const noexcept = 0; - - /** - * @private For internal implementation use - * - * Minify the input string assuming that it represents a JSON string, does not parse or validate. - * - * Overridden by each implementation. - * - * @param buf the json document to minify. - * @param len the length of the json document. - * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. - * @param dst_len the number of bytes written. Output only. - * @return the error code, or SUCCESS if there was no error. - */ - simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; - - - /** - * Validate the UTF-8 string. - * - * Overridden by each implementation. - * - * @param buf the string to validate. - * @param len the length of the string in bytes. - * @return true if and only if the string is valid UTF-8. - */ - simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; - -protected: - /** @private Construct an implementation with the given name and description. For subclasses. */ - simdjson_really_inline implementation( - std::string_view name, - std::string_view description, - uint32_t required_instruction_sets - ) : - _name(name), - _description(description), - _required_instruction_sets(required_instruction_sets) - { - } - virtual ~implementation()=default; - -private: - /** - * The name of this implementation. - */ - const std::string _name; - - /** - * The description of this implementation. - */ - const std::string _description; - - /** - * Instruction sets required for this implementation. - */ - const uint32_t _required_instruction_sets; -}; - -/** @private */ -namespace internal { - -/** - * The list of available implementations compiled into simdjson. - */ -class available_implementation_list { -public: - /** Get the list of available implementations compiled into simdjson */ - simdjson_really_inline available_implementation_list() {} - /** Number of implementations */ - size_t size() const noexcept; - /** STL const begin() iterator */ - const implementation * const *begin() const noexcept; - /** STL const end() iterator */ - const implementation * const *end() const noexcept; - - /** - * Get the implementation with the given name. - * - * Case sensitive. - * - * const implementation *impl = simdjson::get_available_implementations()["westmere"]; - * if (!impl) { exit(1); } - * if (!imp->supported_by_runtime_system()) { exit(1); } - * simdjson::get_active_implementation() = impl; - * - * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" - * @return the implementation, or nullptr if the parse failed. - */ - const implementation * operator[](const std::string_view &name) const noexcept { - for (const implementation * impl : *this) { - if (impl->name() == name) { return impl; } - } - return nullptr; - } - - /** - * Detect the most advanced implementation supported by the current host. - * - * This is used to initialize the implementation on startup. - * - * const implementation *impl = simdjson::available_implementation::detect_best_supported(); - * simdjson::get_active_implementation() = impl; - * - * @return the most advanced supported implementation for the current host, or an - * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported - * implementation. Will never return nullptr. - */ - const implementation *detect_best_supported() const noexcept; -}; - -template -class atomic_ptr { -public: - atomic_ptr(T *_ptr) : ptr{_ptr} {} - - operator const T*() const { return ptr.load(); } - const T& operator*() const { return *ptr; } - const T* operator->() const { return ptr.load(); } - - operator T*() { return ptr.load(); } - T& operator*() { return *ptr; } - T* operator->() { return ptr.load(); } - atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } - -private: - std::atomic ptr; -}; - -} // namespace internal - -/** - * The list of available implementations compiled into simdjson. - */ -extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); - -/** - * The active implementation. - * - * Automatically initialized on first use to the most advanced implementation supported by this hardware. - */ -extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation(); - -} // namespace simdjson - -#endif // SIMDJSON_IMPLEMENTATION_H -/* end file include/simdjson/implementation.h */ - -// Inline functions -/* begin file include/simdjson/error-inl.h */ -#ifndef SIMDJSON_INLINE_ERROR_H -#define SIMDJSON_INLINE_ERROR_H - -#include -#include -#include - -namespace simdjson { -namespace internal { - // We store the error code so we can validate the error message is associated with the right code - struct error_code_info { - error_code code; - const char* message; // do not use a fancy std::string where a simple C string will do (no alloc, no destructor) - }; - // These MUST match the codes in error_code. We check this constraint in basictests. - extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[]; -} // namespace internal - - -inline const char *error_message(error_code error) noexcept { - // If you're using error_code, we're trusting you got it from the enum. - return internal::error_codes[int(error)].message; -} - -// deprecated function -#ifndef SIMDJSON_DISABLE_DEPRECATED_API -inline const std::string error_message(int error) noexcept { - if (error < 0 || error >= error_code::NUM_ERROR_CODES) { - return internal::error_codes[UNEXPECTED_ERROR].message; - } - return internal::error_codes[error].message; -} -#endif // SIMDJSON_DISABLE_DEPRECATED_API - -inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept { - return out << error_message(error); -} - -namespace internal { - -// -// internal::simdjson_result_base inline implementation -// - -template -simdjson_really_inline void simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; - } -} - -template -simdjson_warn_unused simdjson_really_inline error_code simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; -} - -template -simdjson_really_inline error_code simdjson_result_base::error() const noexcept { - return this->second; -} - -#if SIMDJSON_EXCEPTIONS - -template -simdjson_really_inline T& simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; -} - -template -simdjson_really_inline T&& simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); -} - -template -simdjson_really_inline T&& simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); -} - -template -simdjson_really_inline simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); -} - -#endif // SIMDJSON_EXCEPTIONS - -template -simdjson_really_inline const T& simdjson_result_base::value_unsafe() const& noexcept { - return this->first; -} - -template -simdjson_really_inline T&& simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); -} - -template -simdjson_really_inline simdjson_result_base::simdjson_result_base(T &&value, error_code error) noexcept - : std::pair(std::forward(value), error) {} -template -simdjson_really_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept - : simdjson_result_base(T{}, error) {} -template -simdjson_really_inline simdjson_result_base::simdjson_result_base(T &&value) noexcept - : simdjson_result_base(std::forward(value), SUCCESS) {} -template -simdjson_really_inline simdjson_result_base::simdjson_result_base() noexcept - : simdjson_result_base(T{}, UNINITIALIZED) {} - -} // namespace internal - -/// -/// simdjson_result inline implementation -/// - -template -simdjson_really_inline void simdjson_result::tie(T &value, error_code &error) && noexcept { - std::forward>(*this).tie(value, error); -} - -template -simdjson_warn_unused simdjson_really_inline error_code simdjson_result::get(T &value) && noexcept { - return std::forward>(*this).get(value); -} - -template -simdjson_really_inline error_code simdjson_result::error() const noexcept { - return internal::simdjson_result_base::error(); -} - -#if SIMDJSON_EXCEPTIONS - -template -simdjson_really_inline T& simdjson_result::value() & noexcept(false) { - return internal::simdjson_result_base::value(); -} - -template -simdjson_really_inline T&& simdjson_result::value() && noexcept(false) { - return std::forward>(*this).value(); -} - -template -simdjson_really_inline T&& simdjson_result::take_value() && noexcept(false) { - return std::forward>(*this).take_value(); -} - -template -simdjson_really_inline simdjson_result::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); -} - -#endif // SIMDJSON_EXCEPTIONS - -template -simdjson_really_inline const T& simdjson_result::value_unsafe() const& noexcept { - return internal::simdjson_result_base::value_unsafe(); -} - -template -simdjson_really_inline T&& simdjson_result::value_unsafe() && noexcept { - return std::forward>(*this).value_unsafe(); -} - -template -simdjson_really_inline simdjson_result::simdjson_result(T &&value, error_code error) noexcept - : internal::simdjson_result_base(std::forward(value), error) {} -template -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : internal::simdjson_result_base(error) {} -template -simdjson_really_inline simdjson_result::simdjson_result(T &&value) noexcept - : internal::simdjson_result_base(std::forward(value)) {} -template -simdjson_really_inline simdjson_result::simdjson_result() noexcept - : internal::simdjson_result_base() {} - -} // namespace simdjson - -#endif // SIMDJSON_INLINE_ERROR_H -/* end file include/simdjson/error-inl.h */ -/* begin file include/simdjson/padded_string-inl.h */ -#ifndef SIMDJSON_INLINE_PADDED_STRING_H -#define SIMDJSON_INLINE_PADDED_STRING_H - - -#include -#include -#include -#include - -namespace simdjson { -namespace internal { - -// The allocate_padded_buffer function is a low-level function to allocate memory -// with padding so we can read past the "length" bytes safely. It is used by -// the padded_string class automatically. It returns nullptr in case -// of error: the caller should check for a null pointer. -// The length parameter is the maximum size in bytes of the string. -// The caller is responsible to free the memory (e.g., delete[] (...)). -inline char *allocate_padded_buffer(size_t length) noexcept { - const size_t totalpaddedlength = length + SIMDJSON_PADDING; - if(totalpaddedlength(1UL<<20)) { - return nullptr; - } -#endif - - char *padded_buffer = new (std::nothrow) char[totalpaddedlength]; - if (padded_buffer == nullptr) { - return nullptr; - } - // We write zeroes in the padded region to avoid having uninitized - // garbage. If nothing else, garbage getting read might trigger a - // warning in a memory checking. - std::memset(padded_buffer + length, 0, totalpaddedlength - length); - return padded_buffer; -} // allocate_padded_buffer() - -} // namespace internal - - -inline padded_string::padded_string() noexcept {} -inline padded_string::padded_string(size_t length) noexcept - : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { -} -inline padded_string::padded_string(const char *data, size_t length) noexcept - : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { - if ((data != nullptr) && (data_ptr != nullptr)) { - std::memcpy(data_ptr, data, length); - } -} -// note: do not pass std::string arguments by value -inline padded_string::padded_string(const std::string & str_ ) noexcept - : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) { - if (data_ptr != nullptr) { - std::memcpy(data_ptr, str_.data(), str_.size()); - } -} -// note: do pass std::string_view arguments by value -inline padded_string::padded_string(std::string_view sv_) noexcept - : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) { - if(simdjson_unlikely(!data_ptr)) { - //allocation failed or zero size - viable_size=0; - return; - } - if (sv_.size()) { - std::memcpy(data_ptr, sv_.data(), sv_.size()); - } -} -inline padded_string::padded_string(padded_string &&o) noexcept - : viable_size(o.viable_size), data_ptr(o.data_ptr) { - o.data_ptr = nullptr; // we take ownership -} - -inline padded_string &padded_string::operator=(padded_string &&o) noexcept { - delete[] data_ptr; - data_ptr = o.data_ptr; - viable_size = o.viable_size; - o.data_ptr = nullptr; // we take ownership - o.viable_size = 0; - return *this; -} - -inline void padded_string::swap(padded_string &o) noexcept { - size_t tmp_viable_size = viable_size; - char *tmp_data_ptr = data_ptr; - viable_size = o.viable_size; - data_ptr = o.data_ptr; - o.data_ptr = tmp_data_ptr; - o.viable_size = tmp_viable_size; -} - -inline padded_string::~padded_string() noexcept { - delete[] data_ptr; -} - -inline size_t padded_string::size() const noexcept { return viable_size; } - -inline size_t padded_string::length() const noexcept { return viable_size; } - -inline const char *padded_string::data() const noexcept { return data_ptr; } - -inline char *padded_string::data() noexcept { return data_ptr; } - -inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); } - -inline padded_string::operator padded_string_view() const noexcept { - return padded_string_view(data(), length(), length() + SIMDJSON_PADDING); -} - -inline simdjson_result padded_string::load(std::string_view filename) noexcept { - // Open the file - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - std::FILE *fp = std::fopen(filename.data(), "rb"); - SIMDJSON_POP_DISABLE_WARNINGS - - if (fp == nullptr) { - return IO_ERROR; - } - - // Get the file size - if(std::fseek(fp, 0, SEEK_END) < 0) { - std::fclose(fp); - return IO_ERROR; - } -#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS - __int64 llen = _ftelli64(fp); - if(llen == -1L) { - std::fclose(fp); - return IO_ERROR; - } -#else - long llen = std::ftell(fp); - if((llen < 0) || (llen == LONG_MAX)) { - std::fclose(fp); - return IO_ERROR; - } -#endif - - // Allocate the padded_string - size_t len = static_cast(llen); - padded_string s(len); - if (s.data() == nullptr) { - std::fclose(fp); - return MEMALLOC; - } - - // Read the padded_string - std::rewind(fp); - size_t bytes_read = std::fread(s.data(), 1, len, fp); - if (std::fclose(fp) != 0 || bytes_read != len) { - return IO_ERROR; - } - - return s; -} - -} // namespace simdjson - -#endif // SIMDJSON_INLINE_PADDED_STRING_H -/* end file include/simdjson/padded_string-inl.h */ -/* begin file include/simdjson/padded_string_view-inl.h */ -#ifndef SIMDJSON_PADDED_STRING_VIEW_INL_H -#define SIMDJSON_PADDED_STRING_VIEW_INL_H - - -#include -#include -#include -#include - -namespace simdjson { - -inline padded_string_view::padded_string_view(const char* s, size_t len, size_t capacity) noexcept - : std::string_view(s, len), _capacity(capacity) -{ -} - -inline padded_string_view::padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept - : padded_string_view(reinterpret_cast(s), len, capacity) -{ -} - -inline padded_string_view::padded_string_view(const std::string &s) noexcept - : std::string_view(s), _capacity(s.capacity()) -{ -} - -inline padded_string_view::padded_string_view(std::string_view s, size_t capacity) noexcept - : std::string_view(s), _capacity(capacity) -{ -} - -inline size_t padded_string_view::capacity() const noexcept { return _capacity; } - -inline size_t padded_string_view::padding() const noexcept { return capacity() - length(); } - -} // namespace simdjson - -#endif // SIMDJSON_PADDED_STRING_VIEW_INL_H -/* end file include/simdjson/padded_string_view-inl.h */ - -SIMDJSON_POP_DISABLE_WARNINGS - -#endif // SIMDJSON_BASE_H -/* end file include/simdjson/base.h */ - -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_UNDESIRED_WARNINGS - -/* begin file include/simdjson/dom/array.h */ -#ifndef SIMDJSON_DOM_ARRAY_H -#define SIMDJSON_DOM_ARRAY_H - -/* begin file include/simdjson/internal/tape_ref.h */ -#ifndef SIMDJSON_INTERNAL_TAPE_REF_H -#define SIMDJSON_INTERNAL_TAPE_REF_H - -/* begin file include/simdjson/internal/tape_type.h */ -#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H -#define SIMDJSON_INTERNAL_TAPE_TYPE_H - -namespace simdjson { -namespace internal { - -/** - * The possible types in the tape. - */ -enum class tape_type { - ROOT = 'r', - START_ARRAY = '[', - START_OBJECT = '{', - END_ARRAY = ']', - END_OBJECT = '}', - STRING = '"', - INT64 = 'l', - UINT64 = 'u', - DOUBLE = 'd', - TRUE_VALUE = 't', - FALSE_VALUE = 'f', - NULL_VALUE = 'n' -}; // enum class tape_type - -} // namespace internal -} // namespace simdjson - -#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H -/* end file include/simdjson/internal/tape_type.h */ - -namespace simdjson { - -namespace dom { - class document; -} - -namespace internal { - -constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; -constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF; - -/** - * A reference to an element on the tape. Internal only. - */ -class tape_ref { -public: - simdjson_really_inline tape_ref() noexcept; - simdjson_really_inline tape_ref(const dom::document *doc, size_t json_index) noexcept; - inline size_t after_element() const noexcept; - simdjson_really_inline tape_type tape_ref_type() const noexcept; - simdjson_really_inline uint64_t tape_value() const noexcept; - simdjson_really_inline bool is_double() const noexcept; - simdjson_really_inline bool is_int64() const noexcept; - simdjson_really_inline bool is_uint64() const noexcept; - simdjson_really_inline bool is_false() const noexcept; - simdjson_really_inline bool is_true() const noexcept; - simdjson_really_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. - simdjson_really_inline uint32_t matching_brace_index() const noexcept; - simdjson_really_inline uint32_t scope_count() const noexcept; - template - simdjson_really_inline T next_tape_value() const noexcept; - simdjson_really_inline uint32_t get_string_length() const noexcept; - simdjson_really_inline const char * get_c_str() const noexcept; - inline std::string_view get_string_view() const noexcept; - simdjson_really_inline bool is_document_root() const noexcept; - - /** The document this element references. */ - const dom::document *doc; - - /** The index of this element on `doc.tape[]` */ - size_t json_index; -}; - -} // namespace internal -} // namespace simdjson - -#endif // SIMDJSON_INTERNAL_TAPE_REF_H -/* end file include/simdjson/internal/tape_ref.h */ - -namespace simdjson { - -namespace internal { -template -class string_builder; -} -namespace dom { - -class document; -class element; - -/** - * JSON array. - */ -class array { -public: - /** Create a new, invalid array */ - simdjson_really_inline array() noexcept; - - class iterator { - public: - using value_type = element; - using difference_type = std::ptrdiff_t; - - /** - * Get the actual value - */ - inline value_type operator*() const noexcept; - /** - * Get the next value. - * - * Part of the std::iterator interface. - */ - inline iterator& operator++() noexcept; - /** - * Get the next value. - * - * Part of the std::iterator interface. - */ - inline iterator operator++(int) noexcept; - /** - * Check if these values come from the same place in the JSON. - * - * Part of the std::iterator interface. - */ - inline bool operator!=(const iterator& other) const noexcept; - inline bool operator==(const iterator& other) const noexcept; - - inline bool operator<(const iterator& other) const noexcept; - inline bool operator<=(const iterator& other) const noexcept; - inline bool operator>=(const iterator& other) const noexcept; - inline bool operator>(const iterator& other) const noexcept; - - iterator() noexcept = default; - iterator(const iterator&) noexcept = default; - iterator& operator=(const iterator&) noexcept = default; - private: - simdjson_really_inline iterator(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; - friend class array; - }; - - /** - * Return the first array element. - * - * Part of the std::iterable interface. - */ - inline iterator begin() const noexcept; - /** - * One past the last array element. - * - * Part of the std::iterable interface. - */ - inline iterator end() const noexcept; - /** - * Get the size of the array (number of immediate children). - * It is a saturated value with a maximum of 0xFFFFFF: if the value - * is 0xFFFFFF then the size is 0xFFFFFF or greater. - */ - inline size_t size() const noexcept; - /** - * Get the total number of slots used by this array on the tape. - * - * Note that this is not the same thing as `size()`, which reports the - * number of actual elements within an array (not counting its children). - * - * Since an element can use 1 or 2 slots on the tape, you can only use this - * to figure out the total size of an array (including its children, - * recursively) if you know its structure ahead of time. - **/ - inline size_t number_of_slots() const noexcept; - /** - * Get the value associated with the given JSON pointer. We use the RFC 6901 - * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node - * as the root of its own JSON document. - * - * dom::parser parser; - * array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded); - * a.at_pointer("/0/foo/a/1") == 20 - * a.at_pointer("0")["foo"]["a"].at(1) == 20 - * - * @return The value associated with the given JSON pointer, or: - * - NO_SUCH_FIELD if a field does not exist in an object - * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length - * - INCORRECT_TYPE if a non-integer is used to access an array - * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed - */ - inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; - - /** - * Get the value at the given index. This function has linear-time complexity and - * is equivalent to the following: - * - * size_t i=0; - * for (auto element : *this) { - * if (i == index) { return element; } - * i++; - * } - * return INDEX_OUT_OF_BOUNDS; - * - * Avoid calling the at() function repeatedly. - * - * @return The value at the given index, or: - * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length - */ - inline simdjson_result at(size_t index) const noexcept; - -private: - simdjson_really_inline array(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; - friend class element; - friend struct simdjson_result; - template - friend class simdjson::internal::string_builder; -}; - - -} // namespace dom - -/** The result of a JSON conversion that may fail. */ -template<> -struct simdjson_result : public internal::simdjson_result_base { -public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::array value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; - inline simdjson_result at(size_t index) const noexcept; - -#if SIMDJSON_EXCEPTIONS - inline dom::array::iterator begin() const noexcept(false); - inline dom::array::iterator end() const noexcept(false); - inline size_t size() const noexcept(false); -#endif // SIMDJSON_EXCEPTIONS -}; - - - -} // namespace simdjson - -#if defined(__cpp_lib_ranges) -#include - -namespace std { -namespace ranges { -template<> -inline constexpr bool enable_view = true; -#if SIMDJSON_EXCEPTIONS -template<> -inline constexpr bool enable_view> = true; -#endif // SIMDJSON_EXCEPTIONS -} // namespace ranges -} // namespace std -#endif // defined(__cpp_lib_ranges) - -#endif // SIMDJSON_DOM_ARRAY_H -/* end file include/simdjson/dom/array.h */ -/* begin file include/simdjson/dom/document_stream.h */ -#ifndef SIMDJSON_DOCUMENT_STREAM_H -#define SIMDJSON_DOCUMENT_STREAM_H - -/* begin file include/simdjson/dom/parser.h */ -#ifndef SIMDJSON_DOM_PARSER_H -#define SIMDJSON_DOM_PARSER_H - -/* begin file include/simdjson/dom/document.h */ -#ifndef SIMDJSON_DOM_DOCUMENT_H -#define SIMDJSON_DOM_DOCUMENT_H - -#include -#include - -namespace simdjson { -namespace dom { - -class element; - -/** - * A parsed JSON document. - * - * This class cannot be copied, only moved, to avoid unintended allocations. - */ -class document { -public: - /** - * Create a document container with zero capacity. - * - * The parser will allocate capacity as needed. - */ - document() noexcept = default; - ~document() noexcept = default; - - /** - * Take another document's buffers. - * - * @param other The document to take. Its capacity is zeroed and it is invalidated. - */ - document(document &&other) noexcept = default; - /** @private */ - document(const document &) = delete; // Disallow copying - /** - * Take another document's buffers. - * - * @param other The document to take. Its capacity is zeroed. - */ - document &operator=(document &&other) noexcept = default; - /** @private */ - document &operator=(const document &) = delete; // Disallow copying - - /** - * Get the root element of this document as a JSON array. - */ - element root() const noexcept; - - /** - * @private Dump the raw tape for debugging. - * - * @param os the stream to output to. - * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). - */ - bool dump_raw_tape(std::ostream &os) const noexcept; - - /** @private Structural values. */ - std::unique_ptr tape{}; - - /** @private String values. - * - * Should be at least byte_capacity. - */ - std::unique_ptr string_buf{}; - /** @private Allocate memory to support - * input JSON documents of up to len bytes. - * - * When calling this function, you lose - * all the data. - * - * The memory allocation is strict: you - * can you use this function to increase - * or lower the amount of allocated memory. - * Passsing zero clears the memory. - */ - error_code allocate(size_t len) noexcept; - /** @private Capacity in bytes, in terms - * of how many bytes of input JSON we can - * support. - */ - size_t capacity() const noexcept; - - -private: - size_t allocated_capacity{0}; - friend class parser; -}; // class document - -} // namespace dom -} // namespace simdjson - -#endif // SIMDJSON_DOM_DOCUMENT_H -/* end file include/simdjson/dom/document.h */ -#include -#include -#include - -namespace simdjson { - -namespace dom { - -class document_stream; -class element; - -/** The default batch size for parser.parse_many() and parser.load_many() */ -static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; -/** - * Some adversary might try to set the batch size to 0 or 1, which might cause problems. - * We set a minimum of 32B since anything else is highly likely to be an error. In practice, - * most users will want a much larger batch size. - * - * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON - * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. - */ -static constexpr size_t MINIMAL_BATCH_SIZE = 32; - -/** - * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes). - */ -static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32; - -/** - * A persistent document parser. - * - * The parser is designed to be reused, holding the internal buffers necessary to do parsing, - * as well as memory for a single document. The parsed document is overwritten on each parse. - * - * This class cannot be copied, only moved, to avoid unintended allocations. - * - * @note Moving a parser instance may invalidate "dom::element" instances. If you need to - * preserve both the "dom::element" instances and the parser, consider wrapping the parser - * instance in a std::unique_ptr instance: - * - * std::unique_ptr parser(new dom::parser{}); - * auto error = parser->load(f).get(root); - * - * You can then move std::unique_ptr safely. - * - * @note This is not thread safe: one parser cannot produce two documents at the same time! - */ -class parser { -public: - /** - * Create a JSON parser. - * - * The new parser will have zero capacity. - * - * @param max_capacity The maximum document length the parser can automatically handle. The parser - * will allocate more capacity on an as needed basis (when it sees documents too big to handle) - * up to this amount. The parser still starts with zero capacity no matter what this number is: - * to allocate an initial capacity, call allocate() after constructing the parser. - * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). - */ - simdjson_really_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; - /** - * Take another parser's buffers and state. - * - * @param other The parser to take. Its capacity is zeroed. - */ - simdjson_really_inline parser(parser &&other) noexcept; - parser(const parser &) = delete; ///< @private Disallow copying - /** - * Take another parser's buffers and state. - * - * @param other The parser to take. Its capacity is zeroed. - */ - simdjson_really_inline parser &operator=(parser &&other) noexcept; - parser &operator=(const parser &) = delete; ///< @private Disallow copying - - /** Deallocate the JSON parser. */ - ~parser()=default; - - /** - * Load a JSON document from a file and return a reference to it. - * - * dom::parser parser; - * const element doc = parser.load("jsonexamples/twitter.json"); - * - * The function is eager: the file's content is loaded in memory inside the parser instance - * and immediately parsed. The file can be deleted after the `parser.load` call. - * - * ### IMPORTANT: Document Lifetime - * - * The JSON document still lives in the parser: this is the most efficient way to parse JSON - * documents because it reuses the same buffers, but you *must* use the document before you - * destroy the parser or call parse() again. - * - * Moving the parser instance is safe, but it invalidates the element instances. You may store - * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like - * so: `std::unique_ptr parser(new dom::parser{});`. - * - * ### Parser Capacity - * - * If the parser's current capacity is less than the file length, it will allocate enough capacity - * to handle it (up to max_capacity). - * - * @param path The path to load. - * @return The document, or an error: - * - IO_ERROR if there was an error opening or reading the file. - * Be mindful that on some 32-bit systems, - * the file size might be limited to 2 GB. - * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. - * - CAPACITY if the parser does not have enough capacity and len > max_capacity. - * - other json errors if parsing fails. You should not rely on these errors to always the same for the - * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). - */ - inline simdjson_result load(const std::string &path) & noexcept; - inline simdjson_result load(const std::string &path) && = delete ; - /** - * Parse a JSON document and return a temporary reference to it. - * - * dom::parser parser; - * element doc_root = parser.parse(buf, len); - * - * The function eagerly parses the input: the input can be modified and discarded after - * the `parser.parse(buf, len)` call has completed. - * - * ### IMPORTANT: Document Lifetime - * - * The JSON document still lives in the parser: this is the most efficient way to parse JSON - * documents because it reuses the same buffers, but you *must* use the document before you - * destroy the parser or call parse() again. - * - * Moving the parser instance is safe, but it invalidates the element instances. You may store - * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like - * so: `std::unique_ptr parser(new dom::parser{});`. - * - * ### REQUIRED: Buffer Padding - * - * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. - * - * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding, - * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: - * - * const char *json = R"({"key":"value"})"; - * const size_t json_len = std::strlen(json); - * simdjson::dom::parser parser; - * simdjson::dom::element element = parser.parse(json, json_len); - * - * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)), - * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end. - * The benefit of setting realloc_if_needed to false is that you avoid a temporary - * memory allocation and a copy. - * - * The padded bytes may be read. It is not important how you initialize - * these bytes though we recommend a sensible default like null character values or spaces. - * For example, the following low-level code is safe: - * - * const char *json = R"({"key":"value"})"; - * const size_t json_len = std::strlen(json); - * std::unique_ptr padded_json_copy{new char[json_len + SIMDJSON_PADDING]}; - * std::memcpy(padded_json_copy.get(), json, json_len); - * std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING); - * simdjson::dom::parser parser; - * simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false); - * - * ### Parser Capacity - * - * If the parser's current capacity is less than len, it will allocate enough capacity - * to handle it (up to max_capacity). - * - * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless - * realloc_if_needed is true. - * @param len The length of the JSON. - * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. - * @return An element pointing at the root of the document, or an error: - * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, - * and memory allocation fails. - * - CAPACITY if the parser does not have enough capacity and len > max_capacity. - * - other json errors if parsing fails. You should not rely on these errors to always the same for the - * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). - */ - inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; - inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; - /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; - simdjson_really_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; - /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse(const std::string &s) & noexcept; - simdjson_really_inline simdjson_result parse(const std::string &s) && =delete; - /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse(const padded_string &s) & noexcept; - simdjson_really_inline simdjson_result parse(const padded_string &s) && =delete; - - /** @private We do not want to allow implicit conversion from C string to std::string. */ - simdjson_really_inline simdjson_result parse(const char *buf) noexcept = delete; - - /** - * Parse a JSON document into a provide document instance and return a temporary reference to it. - * It is similar to the function `parse` except that instead of parsing into the internal - * `document` instance associated with the parser, it allows the user to provide a document - * instance. - * - * dom::parser parser; - * dom::document doc; - * element doc_root = parser.parse_into_document(doc, buf, len); - * - * The function eagerly parses the input: the input can be modified and discarded after - * the `parser.parse(buf, len)` call has completed. - * - * ### IMPORTANT: Document Lifetime - * - * After the call to parse_into_document, the parser is no longer needed. - * - * The JSON document lives in the document instance: you must keep the document - * instance alive while you navigate through it (i.e., used the returned value from - * parse_into_document). You are encourage to reuse the document instance - * many times with new data to avoid reallocations: - * - * dom::document doc; - * element doc_root1 = parser.parse_into_document(doc, buf1, len); - * //... doc_root1 is a pointer inside doc - * element doc_root2 = parser.parse_into_document(doc, buf1, len); - * //... doc_root2 is a pointer inside doc - * // at this point doc_root1 is no longer safe - * - * Moving the document instance is safe, but it invalidates the element instances. After - * moving a document, you can recover safe access to the document root with its `root()` method. - * - * @param doc The document instance where the parsed data will be stored (on success). - * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless - * realloc_if_needed is true. - * @param len The length of the JSON. - * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. - * @return An element pointing at the root of document, or an error: - * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, - * and memory allocation fails. - * - CAPACITY if the parser does not have enough capacity and len > max_capacity. - * - other json errors if parsing fails. You should not rely on these errors to always the same for the - * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). - */ - inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; - inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; - /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; - simdjson_really_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete; - /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const std::string &s) & noexcept; - simdjson_really_inline simdjson_result parse_into_document(document& doc, const std::string &s) && =delete; - /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const padded_string &s) & noexcept; - simdjson_really_inline simdjson_result parse_into_document(document& doc, const padded_string &s) && =delete; - - /** @private We do not want to allow implicit conversion from C string to std::string. */ - simdjson_really_inline simdjson_result parse_into_document(document& doc, const char *buf) noexcept = delete; - - /** - * Load a file containing many JSON documents. - * - * dom::parser parser; - * for (const element doc : parser.load_many(path)) { - * cout << std::string(doc["title"]) << endl; - * } - * - * The file is loaded in memory and can be safely deleted after the `parser.load_many(path)` - * function has returned. The memory is held by the `parser` instance. - * - * The function is lazy: it may be that no more than one JSON document at a time is parsed. - * And, possibly, no document many have been parsed when the `parser.load_many(path)` function - * returned. - * - * ### Format - * - * The file must contain a series of one or more JSON documents, concatenated into a single - * buffer, separated by whitespace. It effectively parses until it has a fully valid document, - * then starts parsing the next document at that point. (It does this with more parallelism and - * lookahead than you might think, though.) - * - * Documents that consist of an object or array may omit the whitespace between them, concatenating - * with no separator. documents that consist of a single primitive (i.e. documents that are not - * arrays or objects) MUST be separated with whitespace. - * - * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. - * Setting batch_size to excessively large or excesively small values may impact negatively the - * performance. - * - * ### Error Handling - * - * All errors are returned during iteration: if there is a global error such as memory allocation, - * it will be yielded as the first result. Iteration always stops after the first error. - * - * As with all other simdjson methods, non-exception error handling is readily available through - * the same interface, requiring you to check the error before using the document: - * - * dom::parser parser; - * dom::document_stream docs; - * auto error = parser.load_many(path).get(docs); - * if (error) { cerr << error << endl; exit(1); } - * for (auto doc : docs) { - * std::string_view title; - * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } - * cout << title << endl; - * } - * - * ### Threads - * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. - * - * ### Parser Capacity - * - * If the parser's current capacity is less than batch_size, it will allocate enough capacity - * to handle it (up to max_capacity). - * - * @param path File name pointing at the concatenated JSON to parse. - * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet - * spot is cache-related: small enough to fit in cache, yet big enough to - * parse as many documents as possible in one tight loop. - * Defaults to 1MB (as simdjson::dom::DEFAULT_BATCH_SIZE), which has been a reasonable sweet - * spot in our tests. - * If you set the batch_size to a value smaller than simdjson::dom::MINIMAL_BATCH_SIZE - * (currently 32B), it will be replaced by simdjson::dom::MINIMAL_BATCH_SIZE. - * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: - * - IO_ERROR if there was an error opening or reading the file. - * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. - * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. - * - other json errors if parsing fails. You should not rely on these errors to always the same for the - * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). - */ - inline simdjson_result load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; - - /** - * Parse a buffer containing many JSON documents. - * - * dom::parser parser; - * for (element doc : parser.parse_many(buf, len)) { - * cout << std::string(doc["title"]) << endl; - * } - * - * No copy of the input buffer is made. - * - * The function is lazy: it may be that no more than one JSON document at a time is parsed. - * And, possibly, no document many have been parsed when the `parser.load_many(path)` function - * returned. - * - * The caller is responsabile to ensure that the input string data remains unchanged and is - * not deleted during the loop. In particular, the following is unsafe and will not compile: - * - * auto docs = parser.parse_many("[\"temporary data\"]"_padded); - * // here the string "[\"temporary data\"]" may no longer exist in memory - * // the parser instance may not have even accessed the input yet - * for (element doc : docs) { - * cout << std::string(doc["title"]) << endl; - * } - * - * The following is safe: - * - * auto json = "[\"temporary data\"]"_padded; - * auto docs = parser.parse_many(json); - * for (element doc : docs) { - * cout << std::string(doc["title"]) << endl; - * } - * - * ### Format - * - * The buffer must contain a series of one or more JSON documents, concatenated into a single - * buffer, separated by whitespace. It effectively parses until it has a fully valid document, - * then starts parsing the next document at that point. (It does this with more parallelism and - * lookahead than you might think, though.) - * - * documents that consist of an object or array may omit the whitespace between them, concatenating - * with no separator. documents that consist of a single primitive (i.e. documents that are not - * arrays or objects) MUST be separated with whitespace. - * - * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. - * Setting batch_size to excessively large or excesively small values may impact negatively the - * performance. - * - * ### Error Handling - * - * All errors are returned during iteration: if there is a global error such as memory allocation, - * it will be yielded as the first result. Iteration always stops after the first error. - * - * As with all other simdjson methods, non-exception error handling is readily available through - * the same interface, requiring you to check the error before using the document: - * - * dom::parser parser; - * dom::document_stream docs; - * auto error = parser.load_many(path).get(docs); - * if (error) { cerr << error << endl; exit(1); } - * for (auto doc : docs) { - * std::string_view title; - * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } - * cout << title << endl; - * } - * - * ### REQUIRED: Buffer Padding - * - * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. - * - * ### Threads - * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. - * - * ### Parser Capacity - * - * If the parser's current capacity is less than batch_size, it will allocate enough capacity - * to handle it (up to max_capacity). - * - * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. - * @param len The length of the concatenated JSON. - * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet - * spot is cache-related: small enough to fit in cache, yet big enough to - * parse as many documents as possible in one tight loop. - * Defaults to 10MB, which has been a reasonable sweet spot in our tests. - * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: - * - MEMALLOC if the parser does not have enough capacity and memory allocation fails - * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. - * - other json errors if parsing fails. You should not rely on these errors to always the same for the - * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). - */ - inline simdjson_result parse_many(const uint8_t *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; - /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline simdjson_result parse_many(const char *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; - /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline simdjson_result parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; - inline simdjson_result parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe - /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline simdjson_result parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; - inline simdjson_result parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe - - /** @private We do not want to allow implicit conversion from C string to std::string. */ - simdjson_result parse_many(const char *buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept = delete; - - /** - * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length - * and `max_depth` depth. - * - * @param capacity The new capacity. - * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. - * @return The error, if there is one. - */ - simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; - -#ifndef SIMDJSON_DISABLE_DEPRECATED_API - /** - * @private deprecated because it returns bool instead of error_code, which is our standard for - * failures. Use allocate() instead. - * - * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length - * and `max_depth` depth. - * - * @param capacity The new capacity. - * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. - * @return true if successful, false if allocation failed. - */ - [[deprecated("Use allocate() instead.")]] - simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; -#endif // SIMDJSON_DISABLE_DEPRECATED_API - /** - * The largest document this parser can support without reallocating. - * - * @return Current capacity, in bytes. - */ - simdjson_really_inline size_t capacity() const noexcept; - - /** - * The largest document this parser can automatically support. - * - * The parser may reallocate internal buffers as needed up to this amount. - * - * @return Maximum capacity, in bytes. - */ - simdjson_really_inline size_t max_capacity() const noexcept; - - /** - * The maximum level of nested object and arrays supported by this parser. - * - * @return Maximum depth, in bytes. - */ - simdjson_really_inline size_t max_depth() const noexcept; - - /** - * Set max_capacity. This is the largest document this parser can automatically support. - * - * The parser may reallocate internal buffers as needed up to this amount as documents are passed - * to it. - * - * Note: To avoid limiting the memory to an absurd value, such as zero or two bytes, - * iff you try to set max_capacity to a value lower than MINIMAL_DOCUMENT_CAPACITY, - * then the maximal capacity is set to MINIMAL_DOCUMENT_CAPACITY. - * - * This call will not allocate or deallocate, even if capacity is currently above max_capacity. - * - * @param max_capacity The new maximum capacity, in bytes. - */ - simdjson_really_inline void set_max_capacity(size_t max_capacity) noexcept; - -#ifdef SIMDJSON_THREADS_ENABLED - /** - * The parser instance can use threads when they are available to speed up some - * operations. It is enabled by default. Changing this attribute will change the - * behavior of the parser for future operations. - */ - bool threaded{true}; -#endif - /** @private Use the new DOM API instead */ - class Iterator; - /** @private Use simdjson_error instead */ - using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error; - - /** @private [for benchmarking access] The implementation to use */ - std::unique_ptr implementation{}; - - /** @private Use `if (parser.parse(...).error())` instead */ - bool valid{false}; - /** @private Use `parser.parse(...).error()` instead */ - error_code error{UNINITIALIZED}; - - /** @private Use `parser.parse(...).value()` instead */ - document doc{}; - - /** @private returns true if the document parsed was valid */ - [[deprecated("Use the result of parser.parse() instead")]] - inline bool is_valid() const noexcept; - - /** - * @private return an error code corresponding to the last parsing attempt, see - * simdjson.h will return UNINITIALIZED if no parsing was attempted - */ - [[deprecated("Use the result of parser.parse() instead")]] - inline int get_error_code() const noexcept; - - /** @private return the string equivalent of "get_error_code" */ - [[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]] - inline std::string get_error_message() const noexcept; - - /** @private */ - [[deprecated("Use cout << on the result of parser.parse() instead")]] - inline bool print_json(std::ostream &os) const noexcept; - - /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */ - inline bool dump_raw_tape(std::ostream &os) const noexcept; - - -private: - /** - * The maximum document length this parser will automatically support. - * - * The parser will not be automatically allocated above this amount. - */ - size_t _max_capacity; - - /** - * The loaded buffer (reused each time load() is called) - */ - std::unique_ptr loaded_bytes; - - /** Capacity of loaded_bytes buffer. */ - size_t _loaded_bytes_capacity{0}; - - // all nodes are stored on the doc.tape using a 64-bit word. - // - // strings, double and ints are stored as - // a 64-bit word with a pointer to the actual value - // - // - // - // for objects or arrays, store [ or { at the beginning and } and ] at the - // end. For the openings ([ or {), we annotate them with a reference to the - // location on the doc.tape of the end, and for then closings (} and ]), we - // annotate them with a reference to the location of the opening - // - // - - /** - * Ensure we have enough capacity to handle at least desired_capacity bytes, - * and auto-allocate if not. This also allocates memory if needed in the - * internal document. - */ - inline error_code ensure_capacity(size_t desired_capacity) noexcept; - /** - * Ensure we have enough capacity to handle at least desired_capacity bytes, - * and auto-allocate if not. This also allocates memory if needed in the - * provided document. - */ - inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept; - - /** Read the file into loaded_bytes */ - inline simdjson_result read_file(const std::string &path) noexcept; - - friend class parser::Iterator; - friend class document_stream; - - -}; // class parser - -} // namespace dom -} // namespace simdjson - -#endif // SIMDJSON_DOM_PARSER_H -/* end file include/simdjson/dom/parser.h */ -#ifdef SIMDJSON_THREADS_ENABLED -#include -#include -#include -#endif - -namespace simdjson { -namespace dom { - - -#ifdef SIMDJSON_THREADS_ENABLED -/** @private Custom worker class **/ -struct stage1_worker { - stage1_worker() noexcept = default; - stage1_worker(const stage1_worker&) = delete; - stage1_worker(stage1_worker&&) = delete; - stage1_worker operator=(const stage1_worker&) = delete; - ~stage1_worker(); - /** - * We only start the thread when it is needed, not at object construction, this may throw. - * You should only call this once. - **/ - void start_thread(); - /** - * Start a stage 1 job. You should first call 'run', then 'finish'. - * You must call start_thread once before. - */ - void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start); - /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ - void finish(); - -private: - - /** - * Normally, we would never stop the thread. But we do in the destructor. - * This function is only safe assuming that you are not waiting for results. You - * should have called run, then finish, and be done. - **/ - void stop_thread(); - - std::thread thread{}; - /** These three variables define the work done by the thread. **/ - dom::parser * stage1_thread_parser{}; - size_t _next_batch_start{}; - document_stream * owner{}; - /** - * We have two state variables. This could be streamlined to one variable in the future but - * we use two for clarity. - */ - bool has_work{false}; - bool can_work{true}; - - /** - * We lock using a mutex. - */ - std::mutex locking_mutex{}; - std::condition_variable cond_var{}; -}; -#endif - -/** - * A forward-only stream of documents. - * - * Produced by parser::parse_many. - * - */ -class document_stream { -public: - /** - * Construct an uninitialized document_stream. - * - * ```c++ - * document_stream docs; - * error = parser.parse_many(json).get(docs); - * ``` - */ - simdjson_really_inline document_stream() noexcept; - /** Move one document_stream to another. */ - simdjson_really_inline document_stream(document_stream &&other) noexcept = default; - /** Move one document_stream to another. */ - simdjson_really_inline document_stream &operator=(document_stream &&other) noexcept = default; - - simdjson_really_inline ~document_stream() noexcept; - /** - * Returns the input size in bytes. - */ - inline size_t size_in_bytes() const noexcept; - /** - * After iterating through the stream, this method - * returns the number of bytes that were not parsed at the end - * of the stream. If truncated_bytes() differs from zero, - * then the input was truncated maybe because incomplete JSON - * documents were found at the end of the stream. You - * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). - * - * You should only call truncated_bytes() after streaming through all - * documents, like so: - * - * document_stream stream = parser.parse_many(json,window); - * for(auto doc : stream) { - * // do something with doc - * } - * size_t truncated = stream.truncated_bytes(); - * - */ - inline size_t truncated_bytes() const noexcept; - /** - * An iterator through a forward-only stream of documents. - */ - class iterator { - public: - using value_type = simdjson_result; - using reference = value_type; - - using difference_type = std::ptrdiff_t; - - using iterator_category = std::input_iterator_tag; - - /** - * Default constructor. - */ - simdjson_really_inline iterator() noexcept; - /** - * Get the current document (or error). - */ - simdjson_really_inline reference operator*() noexcept; - /** - * Advance to the next document (prefix). - */ - inline iterator& operator++() noexcept; - /** - * Check if we're at the end yet. - * @param other the end iterator to compare to. - */ - simdjson_really_inline bool operator!=(const iterator &other) const noexcept; - /** - * @private - * - * Gives the current index in the input document in bytes. - * - * document_stream stream = parser.parse_many(json,window); - * for(auto i = stream.begin(); i != stream.end(); ++i) { - * auto doc = *i; - * size_t index = i.current_index(); - * } - * - * This function (current_index()) is experimental and the usage - * may change in future versions of simdjson: we find the API somewhat - * awkward and we would like to offer something friendlier. - */ - simdjson_really_inline size_t current_index() const noexcept; - /** - * @private - * - * Gives a view of the current document. - * - * document_stream stream = parser.parse_many(json,window); - * for(auto i = stream.begin(); i != stream.end(); ++i) { - * auto doc = *i; - * std::string_view v = i->source(); - * } - * - * The returned string_view instance is simply a map to the (unparsed) - * source string: it may thus include white-space characters and all manner - * of padding. - * - * This function (source()) is experimental and the usage - * may change in future versions of simdjson: we find the API somewhat - * awkward and we would like to offer something friendlier. - */ - simdjson_really_inline std::string_view source() const noexcept; - - private: - simdjson_really_inline iterator(document_stream *s, bool finished) noexcept; - /** The document_stream we're iterating through. */ - document_stream* stream; - /** Whether we're finished or not. */ - bool finished; - friend class document_stream; - }; - - /** - * Start iterating the documents in the stream. - */ - simdjson_really_inline iterator begin() noexcept; - /** - * The end of the stream, for iterator comparison purposes. - */ - simdjson_really_inline iterator end() noexcept; - -private: - - document_stream &operator=(const document_stream &) = delete; // Disallow copying - document_stream(const document_stream &other) = delete; // Disallow copying - - /** - * Construct a document_stream. Does not allocate or parse anything until the iterator is - * used. - * - * @param parser is a reference to the parser instance used to generate this document_stream - * @param buf is the raw byte buffer we need to process - * @param len is the length of the raw byte buffer in bytes - * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) - */ - simdjson_really_inline document_stream( - dom::parser &parser, - const uint8_t *buf, - size_t len, - size_t batch_size - ) noexcept; - - /** - * Parse the first document in the buffer. Used by begin(), to handle allocation and - * initialization. - */ - inline void start() noexcept; - - /** - * Parse the next document found in the buffer previously given to document_stream. - * - * The content should be a valid JSON document encoded as UTF-8. If there is a - * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are - * discouraged. - * - * You do NOT need to pre-allocate a parser. This function takes care of - * pre-allocating a capacity defined by the batch_size defined when creating the - * document_stream object. - * - * The function returns simdjson::EMPTY if there is no more data to be parsed. - * - * The function returns simdjson::SUCCESS (as integer = 0) in case of success - * and indicates that the buffer has successfully been parsed to the end. - * Every document it contained has been parsed without error. - * - * The function returns an error code from simdjson/simdjson.h in case of failure - * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; - * the simdjson::error_message function converts these error codes into a string). - * - * You can also check validity by calling parser.is_valid(). The same parser can - * and should be reused for the other documents in the buffer. - */ - inline void next() noexcept; - - /** - * Pass the next batch through stage 1 and return when finished. - * When threads are enabled, this may wait for the stage 1 thread to finish. - */ - inline void load_batch() noexcept; - - /** Get the next document index. */ - inline size_t next_batch_start() const noexcept; - - /** Pass the next batch through stage 1 with the given parser. */ - inline error_code run_stage1(dom::parser &p, size_t batch_start) noexcept; - - dom::parser *parser; - const uint8_t *buf; - size_t len; - size_t batch_size; - /** The error (or lack thereof) from the current document. */ - error_code error; - size_t batch_start{0}; - size_t doc_index{}; -#ifdef SIMDJSON_THREADS_ENABLED - /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ - bool use_thread; - - inline void load_from_stage1_thread() noexcept; - - /** Start a thread to run stage 1 on the next batch. */ - inline void start_stage1_thread() noexcept; - - /** Wait for the stage 1 thread to finish and capture the results. */ - inline void finish_stage1_thread() noexcept; - - /** The error returned from the stage 1 thread. */ - error_code stage1_thread_error{UNINITIALIZED}; - /** The thread used to run stage 1 against the next batch in the background. */ - friend struct stage1_worker; - std::unique_ptr worker{new(std::nothrow) stage1_worker()}; - /** - * The parser used to run stage 1 in the background. Will be swapped - * with the regular parser when finished. - */ - dom::parser stage1_thread_parser{}; -#endif // SIMDJSON_THREADS_ENABLED - - friend class dom::parser; - friend struct simdjson_result; - friend struct internal::simdjson_result_base; - -}; // class document_stream - -} // namespace dom - -template<> -struct simdjson_result : public internal::simdjson_result_base { -public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private - -#if SIMDJSON_EXCEPTIONS - simdjson_really_inline dom::document_stream::iterator begin() noexcept(false); - simdjson_really_inline dom::document_stream::iterator end() noexcept(false); -#else // SIMDJSON_EXCEPTIONS -#ifndef SIMDJSON_DISABLE_DEPRECATED_API - [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] - simdjson_really_inline dom::document_stream::iterator begin() noexcept; - [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] - simdjson_really_inline dom::document_stream::iterator end() noexcept; -#endif // SIMDJSON_DISABLE_DEPRECATED_API -#endif // SIMDJSON_EXCEPTIONS -}; // struct simdjson_result - -} // namespace simdjson - -#endif // SIMDJSON_DOCUMENT_STREAM_H -/* end file include/simdjson/dom/document_stream.h */ -/* begin file include/simdjson/dom/element.h */ -#ifndef SIMDJSON_DOM_ELEMENT_H -#define SIMDJSON_DOM_ELEMENT_H - -#include - -namespace simdjson { -namespace internal { -template -class string_builder; -} -namespace dom { -class array; -class document; -class object; - -/** - * The actual concrete type of a JSON element - * This is the type it is most easily cast to with get<>. - */ -enum class element_type { - ARRAY = '[', ///< dom::array - OBJECT = '{', ///< dom::object - INT64 = 'l', ///< int64_t - UINT64 = 'u', ///< uint64_t: any integer that fits in uint64_t but *not* int64_t - DOUBLE = 'd', ///< double: Any number with a "." or "e" that fits in double. - STRING = '"', ///< std::string_view - BOOL = 't', ///< bool - NULL_VALUE = 'n' ///< null -}; - -/** - * A JSON element. - * - * References an element in a JSON document, representing a JSON null, boolean, string, number, - * array or object. - */ -class element { -public: - /** Create a new, invalid element. */ - simdjson_really_inline element() noexcept; - - /** The type of this element. */ - simdjson_really_inline element_type type() const noexcept; - - /** - * Cast this element to an array. - * - * @returns An object that can be used to iterate the array, or: - * INCORRECT_TYPE if the JSON element is not an array. - */ - inline simdjson_result get_array() const noexcept; - /** - * Cast this element to an object. - * - * @returns An object that can be used to look up or iterate the object's fields, or: - * INCORRECT_TYPE if the JSON element is not an object. - */ - inline simdjson_result get_object() const noexcept; - /** - * Cast this element to a null-terminated C string. - * - * The string is guaranteed to be valid UTF-8. - * - * The length of the string is given by get_string_length(). Because JSON strings - * may contain null characters, it may be incorrect to use strlen to determine the - * string length. - * - * It is possible to get a single string_view instance which represents both the string - * content and its length: see get_string(). - * - * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will - * be invalidated the next time it parses a document or when it is destroyed. - * Returns INCORRECT_TYPE if the JSON element is not a string. - */ - inline simdjson_result get_c_str() const noexcept; - /** - * Gives the length in bytes of the string. - * - * It is possible to get a single string_view instance which represents both the string - * content and its length: see get_string(). - * - * @returns A string length in bytes. - * Returns INCORRECT_TYPE if the JSON element is not a string. - */ - inline simdjson_result get_string_length() const noexcept; - /** - * Cast this element to a string. - * - * The string is guaranteed to be valid UTF-8. - * - * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it - * parses a document or when it is destroyed. - * Returns INCORRECT_TYPE if the JSON element is not a string. - */ - inline simdjson_result get_string() const noexcept; - /** - * Cast this element to a signed integer. - * - * @returns A signed 64-bit integer. - * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE - * if it is negative. - */ - inline simdjson_result get_int64() const noexcept; - /** - * Cast this element to an unsigned integer. - * - * @returns An unsigned 64-bit integer. - * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE - * if it is too large. - */ - inline simdjson_result get_uint64() const noexcept; - /** - * Cast this element to a double floating-point. - * - * @returns A double value. - * Returns INCORRECT_TYPE if the JSON element is not a number. - */ - inline simdjson_result get_double() const noexcept; - /** - * Cast this element to a bool. - * - * @returns A bool value. - * Returns INCORRECT_TYPE if the JSON element is not a boolean. - */ - inline simdjson_result get_bool() const noexcept; - - /** - * Whether this element is a json array. - * - * Equivalent to is(). - */ - inline bool is_array() const noexcept; - /** - * Whether this element is a json object. - * - * Equivalent to is(). - */ - inline bool is_object() const noexcept; - /** - * Whether this element is a json string. - * - * Equivalent to is() or is(). - */ - inline bool is_string() const noexcept; - /** - * Whether this element is a json number that fits in a signed 64-bit integer. - * - * Equivalent to is(). - */ - inline bool is_int64() const noexcept; - /** - * Whether this element is a json number that fits in an unsigned 64-bit integer. - * - * Equivalent to is(). - */ - inline bool is_uint64() const noexcept; - /** - * Whether this element is a json number that fits in a double. - * - * Equivalent to is(). - */ - inline bool is_double() const noexcept; - - /** - * Whether this element is a json number. - * - * Both integers and floating points will return true. - */ - inline bool is_number() const noexcept; - - /** - * Whether this element is a json `true` or `false`. - * - * Equivalent to is(). - */ - inline bool is_bool() const noexcept; - /** - * Whether this element is a json `null`. - */ - inline bool is_null() const noexcept; - - /** - * Tell whether the value can be cast to provided type (T). - * - * Supported types: - * - Boolean: bool - * - Number: double, uint64_t, int64_t - * - String: std::string_view, const char * - * - Array: dom::array - * - Object: dom::object - * - * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object - */ - template - simdjson_really_inline bool is() const noexcept; - - /** - * Get the value as the provided type (T). - * - * Supported types: - * - Boolean: bool - * - Number: double, uint64_t, int64_t - * - String: std::string_view, const char * - * - Array: dom::array - * - Object: dom::object - * - * You may use get_double(), get_bool(), get_uint64(), get_int64(), - * get_object(), get_array() or get_string() instead. - * - * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object - * - * @returns The value cast to the given type, or: - * INCORRECT_TYPE if the value cannot be cast to the given type. - */ - - template - inline simdjson_result get() const noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); - } - - /** - * Get the value as the provided type (T). - * - * Supported types: - * - Boolean: bool - * - Number: double, uint64_t, int64_t - * - String: std::string_view, const char * - * - Array: dom::array - * - Object: dom::object - * - * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object - * - * @param value The variable to set to the value. May not be set if there is an error. - * - * @returns The error that occurred, or SUCCESS if there was no error. - */ - template - simdjson_warn_unused simdjson_really_inline error_code get(T &value) const noexcept; - - /** - * Get the value as the provided type (T), setting error if it's not the given type. - * - * Supported types: - * - Boolean: bool - * - Number: double, uint64_t, int64_t - * - String: std::string_view, const char * - * - Array: dom::array - * - Object: dom::object - * - * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object - * - * @param value The variable to set to the given type. value is undefined if there is an error. - * @param error The variable to store the error. error is set to error_code::SUCCEED if there is an error. - */ - template - inline void tie(T &value, error_code &error) && noexcept; - -#if SIMDJSON_EXCEPTIONS - /** - * Read this element as a boolean. - * - * @return The boolean value - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean. - */ - inline operator bool() const noexcept(false); - - /** - * Read this element as a null-terminated UTF-8 string. - * - * Be mindful that JSON allows strings to contain null characters. - * - * Does *not* convert other types to a string; requires that the JSON type of the element was - * an actual string. - * - * @return The string value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. - */ - inline explicit operator const char*() const noexcept(false); - - /** - * Read this element as a null-terminated UTF-8 string. - * - * Does *not* convert other types to a string; requires that the JSON type of the element was - * an actual string. - * - * @return The string value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. - */ - inline operator std::string_view() const noexcept(false); - - /** - * Read this element as an unsigned integer. - * - * @return The integer value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer - * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative - */ - inline operator uint64_t() const noexcept(false); - /** - * Read this element as an signed integer. - * - * @return The integer value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer - * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits - */ - inline operator int64_t() const noexcept(false); - /** - * Read this element as an double. - * - * @return The double value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number - * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative - */ - inline operator double() const noexcept(false); - /** - * Read this element as a JSON array. - * - * @return The JSON array. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array - */ - inline operator array() const noexcept(false); - /** - * Read this element as a JSON object (key/value pairs). - * - * @return The JSON object. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object - */ - inline operator object() const noexcept(false); - - /** - * Iterate over each element in this array. - * - * @return The beginning of the iteration. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array - */ - inline dom::array::iterator begin() const noexcept(false); - - /** - * Iterate over each element in this array. - * - * @return The end of the iteration. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array - */ - inline dom::array::iterator end() const noexcept(false); -#endif // SIMDJSON_EXCEPTIONS - - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 - * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - * - INCORRECT_TYPE if this is not an object - */ - inline simdjson_result operator[](std::string_view key) const noexcept; - - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 - * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - * - INCORRECT_TYPE if this is not an object - */ - inline simdjson_result operator[](const char *key) const noexcept; - - /** - * Get the value associated with the given JSON pointer. We use the RFC 6901 - * https://tools.ietf.org/html/rfc6901 standard. - * - * dom::parser parser; - * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); - * doc.at_pointer("/foo/a/1") == 20 - * doc.at_pointer("/foo")["a"].at(1) == 20 - * doc.at_pointer("")["foo"]["a"].at(1) == 20 - * - * It is allowed for a key to be the empty string: - * - * dom::parser parser; - * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); - * obj.at_pointer("//a/1") == 20 - * - * @return The value associated with the given JSON pointer, or: - * - NO_SUCH_FIELD if a field does not exist in an object - * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length - * - INCORRECT_TYPE if a non-integer is used to access an array - * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed - */ - inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; - -#ifndef SIMDJSON_DISABLE_DEPRECATED_API - /** - * - * Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard - * and allowed the following : - * - * dom::parser parser; - * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); - * doc.at("foo/a/1") == 20 - * - * Though it is intuitive, it is not compliant with RFC 6901 - * https://tools.ietf.org/html/rfc6901 - * - * For standard compliance, use the at_pointer function instead. - * - * @return The value associated with the given JSON pointer, or: - * - NO_SUCH_FIELD if a field does not exist in an object - * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length - * - INCORRECT_TYPE if a non-integer is used to access an array - * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed - */ - [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] - inline simdjson_result at(const std::string_view json_pointer) const noexcept; -#endif // SIMDJSON_DISABLE_DEPRECATED_API - - /** - * Get the value at the given index. - * - * @return The value at the given index, or: - * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length - */ - inline simdjson_result at(size_t index) const noexcept; - - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 - * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - */ - inline simdjson_result at_key(std::string_view key) const noexcept; - - /** - * Get the value associated with the given key in a case-insensitive manner. - * - * Note: The key will be matched against **unescaped** JSON. - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - */ - inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; - - /** @private for debugging. Prints out the root element. */ - inline bool dump_raw_tape(std::ostream &out) const noexcept; - -private: - simdjson_really_inline element(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; - friend class document; - friend class object; - friend class array; - friend struct simdjson_result; - template - friend class simdjson::internal::string_builder; - -}; - -} // namespace dom - -/** The result of a JSON navigation that may fail. */ -template<> -struct simdjson_result : public internal::simdjson_result_base { -public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::element &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result type() const noexcept; - template - simdjson_really_inline bool is() const noexcept; - template - simdjson_really_inline simdjson_result get() const noexcept; - template - simdjson_warn_unused simdjson_really_inline error_code get(T &value) const noexcept; - - simdjson_really_inline simdjson_result get_array() const noexcept; - simdjson_really_inline simdjson_result get_object() const noexcept; - simdjson_really_inline simdjson_result get_c_str() const noexcept; - simdjson_really_inline simdjson_result get_string_length() const noexcept; - simdjson_really_inline simdjson_result get_string() const noexcept; - simdjson_really_inline simdjson_result get_int64() const noexcept; - simdjson_really_inline simdjson_result get_uint64() const noexcept; - simdjson_really_inline simdjson_result get_double() const noexcept; - simdjson_really_inline simdjson_result get_bool() const noexcept; - - simdjson_really_inline bool is_array() const noexcept; - simdjson_really_inline bool is_object() const noexcept; - simdjson_really_inline bool is_string() const noexcept; - simdjson_really_inline bool is_int64() const noexcept; - simdjson_really_inline bool is_uint64() const noexcept; - simdjson_really_inline bool is_double() const noexcept; - simdjson_really_inline bool is_number() const noexcept; - simdjson_really_inline bool is_bool() const noexcept; - simdjson_really_inline bool is_null() const noexcept; - - simdjson_really_inline simdjson_result operator[](std::string_view key) const noexcept; - simdjson_really_inline simdjson_result operator[](const char *key) const noexcept; - simdjson_really_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; - [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] - simdjson_really_inline simdjson_result at(const std::string_view json_pointer) const noexcept; - simdjson_really_inline simdjson_result at(size_t index) const noexcept; - simdjson_really_inline simdjson_result at_key(std::string_view key) const noexcept; - simdjson_really_inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; - -#if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator bool() const noexcept(false); - simdjson_really_inline explicit operator const char*() const noexcept(false); - simdjson_really_inline operator std::string_view() const noexcept(false); - simdjson_really_inline operator uint64_t() const noexcept(false); - simdjson_really_inline operator int64_t() const noexcept(false); - simdjson_really_inline operator double() const noexcept(false); - simdjson_really_inline operator dom::array() const noexcept(false); - simdjson_really_inline operator dom::object() const noexcept(false); - - simdjson_really_inline dom::array::iterator begin() const noexcept(false); - simdjson_really_inline dom::array::iterator end() const noexcept(false); -#endif // SIMDJSON_EXCEPTIONS -}; - - -} // namespace simdjson - -#endif // SIMDJSON_DOM_DOCUMENT_H -/* end file include/simdjson/dom/element.h */ -/* begin file include/simdjson/dom/object.h */ -#ifndef SIMDJSON_DOM_OBJECT_H -#define SIMDJSON_DOM_OBJECT_H - - -namespace simdjson { -namespace internal { -template -class string_builder; -} -namespace dom { - -class document; -class element; -class key_value_pair; - -/** - * JSON object. - */ -class object { -public: - /** Create a new, invalid object */ - simdjson_really_inline object() noexcept; - - class iterator { - public: - using value_type = key_value_pair; - using difference_type = std::ptrdiff_t; - - /** - * Get the actual key/value pair - */ - inline const value_type operator*() const noexcept; - /** - * Get the next key/value pair. - * - * Part of the std::iterator interface. - * - */ - inline iterator& operator++() noexcept; - /** - * Get the next key/value pair. - * - * Part of the std::iterator interface. - * - */ - inline iterator operator++(int) noexcept; - /** - * Check if these values come from the same place in the JSON. - * - * Part of the std::iterator interface. - */ - inline bool operator!=(const iterator& other) const noexcept; - inline bool operator==(const iterator& other) const noexcept; - - inline bool operator<(const iterator& other) const noexcept; - inline bool operator<=(const iterator& other) const noexcept; - inline bool operator>=(const iterator& other) const noexcept; - inline bool operator>(const iterator& other) const noexcept; - /** - * Get the key of this key/value pair. - */ - inline std::string_view key() const noexcept; - /** - * Get the length (in bytes) of the key in this key/value pair. - * You should expect this function to be faster than key().size(). - */ - inline uint32_t key_length() const noexcept; - /** - * Returns true if the key in this key/value pair is equal - * to the provided string_view. - */ - inline bool key_equals(std::string_view o) const noexcept; - /** - * Returns true if the key in this key/value pair is equal - * to the provided string_view in a case-insensitive manner. - * Case comparisons may only be handled correctly for ASCII strings. - */ - inline bool key_equals_case_insensitive(std::string_view o) const noexcept; - /** - * Get the key of this key/value pair. - */ - inline const char *key_c_str() const noexcept; - /** - * Get the value of this key/value pair. - */ - inline element value() const noexcept; - - iterator() noexcept = default; - iterator(const iterator&) noexcept = default; - iterator& operator=(const iterator&) noexcept = default; - private: - simdjson_really_inline iterator(const internal::tape_ref &tape) noexcept; - - internal::tape_ref tape; - - friend class object; - }; - - /** - * Return the first key/value pair. - * - * Part of the std::iterable interface. - */ - inline iterator begin() const noexcept; - /** - * One past the last key/value pair. - * - * Part of the std::iterable interface. - */ - inline iterator end() const noexcept; - /** - * Get the size of the object (number of keys). - * It is a saturated value with a maximum of 0xFFFFFF: if the value - * is 0xFFFFFF then the size is 0xFFFFFF or greater. - */ - inline size_t size() const noexcept; - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 - * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD - * - * This function has linear-time complexity: the keys are checked one by one. - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - * - INCORRECT_TYPE if this is not an object - */ - inline simdjson_result operator[](std::string_view key) const noexcept; - - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 - * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD - * - * This function has linear-time complexity: the keys are checked one by one. - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - * - INCORRECT_TYPE if this is not an object - */ - inline simdjson_result operator[](const char *key) const noexcept; - - /** - * Get the value associated with the given JSON pointer. We use the RFC 6901 - * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node - * as the root of its own JSON document. - * - * dom::parser parser; - * object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); - * obj.at_pointer("/foo/a/1") == 20 - * obj.at_pointer("/foo")["a"].at(1) == 20 - * - * It is allowed for a key to be the empty string: - * - * dom::parser parser; - * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); - * obj.at_pointer("//a/1") == 20 - * obj.at_pointer("/")["a"].at(1) == 20 - * - * @return The value associated with the given JSON pointer, or: - * - NO_SUCH_FIELD if a field does not exist in an object - * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length - * - INCORRECT_TYPE if a non-integer is used to access an array - * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed - */ - inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; - - /** - * Get the value associated with the given key. - * - * The key will be matched against **unescaped** JSON: - * - * dom::parser parser; - * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 - * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD - * - * This function has linear-time complexity: the keys are checked one by one. - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - */ - inline simdjson_result at_key(std::string_view key) const noexcept; - - /** - * Get the value associated with the given key in a case-insensitive manner. - * It is only guaranteed to work over ASCII inputs. - * - * Note: The key will be matched against **unescaped** JSON. - * - * This function has linear-time complexity: the keys are checked one by one. - * - * @return The value associated with this field, or: - * - NO_SUCH_FIELD if the field does not exist in the object - */ - inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; - -private: - simdjson_really_inline object(const internal::tape_ref &tape) noexcept; - - internal::tape_ref tape; - - friend class element; - friend struct simdjson_result; - template - friend class simdjson::internal::string_builder; -}; - -/** - * Key/value pair in an object. - */ -class key_value_pair { -public: - /** key in the key-value pair **/ - std::string_view key; - /** value in the key-value pair **/ - element value; - -private: - simdjson_really_inline key_value_pair(std::string_view _key, element _value) noexcept; - friend class object; -}; - -} // namespace dom - -/** The result of a JSON conversion that may fail. */ -template<> -struct simdjson_result : public internal::simdjson_result_base { -public: - simdjson_really_inline simdjson_result() noexcept; ///< @private - simdjson_really_inline simdjson_result(dom::object value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - inline simdjson_result operator[](std::string_view key) const noexcept; - inline simdjson_result operator[](const char *key) const noexcept; - inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; - inline simdjson_result at_key(std::string_view key) const noexcept; - inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; - -#if SIMDJSON_EXCEPTIONS - inline dom::object::iterator begin() const noexcept(false); - inline dom::object::iterator end() const noexcept(false); - inline size_t size() const noexcept(false); -#endif // SIMDJSON_EXCEPTIONS -}; - -} // namespace simdjson - -#if defined(__cpp_lib_ranges) -#include - -namespace std { -namespace ranges { -template<> -inline constexpr bool enable_view = true; -#if SIMDJSON_EXCEPTIONS -template<> -inline constexpr bool enable_view> = true; -#endif // SIMDJSON_EXCEPTIONS -} // namespace ranges -} // namespace std -#endif // defined(__cpp_lib_ranges) - -#endif // SIMDJSON_DOM_OBJECT_H -/* end file include/simdjson/dom/object.h */ -/* begin file include/simdjson/dom/serialization.h */ -#ifndef SIMDJSON_SERIALIZATION_H -#define SIMDJSON_SERIALIZATION_H - -#include - -namespace simdjson { - -/** - * The string_builder template and mini_formatter class - * are not part of our public API and are subject to change - * at any time! - */ -namespace internal { - -class mini_formatter; - -/** - * @private The string_builder template allows us to construct - * a string from a document element. It is parametrized - * by a "formatter" which handles the details. Thus - * the string_builder template could support both minification - * and prettification, and various other tradeoffs. - */ -template -class string_builder { -public: - /** Construct an initially empty builder, would print the empty string **/ - string_builder() = default; - /** Append an element to the builder (to be printed) **/ - inline void append(simdjson::dom::element value); - /** Append an array to the builder (to be printed) **/ - inline void append(simdjson::dom::array value); - /** Append an object to the builder (to be printed) **/ - inline void append(simdjson::dom::object value); - /** Reset the builder (so that it would print the empty string) **/ - simdjson_really_inline void clear(); - /** - * Get access to the string. The string_view is owned by the builder - * and it is invalid to use it after the string_builder has been - * destroyed. - * However you can make a copy of the string_view on memory that you - * own. - */ - simdjson_really_inline std::string_view str() const; - /** Append a key_value_pair to the builder (to be printed) **/ - simdjson_really_inline void append(simdjson::dom::key_value_pair value); -private: - formatter format{}; -}; - -/** - * @private This is the class that we expect to use with the string_builder - * template. It tries to produce a compact version of the JSON element - * as quickly as possible. - */ -class mini_formatter { -public: - mini_formatter() = default; - /** Add a comma **/ - simdjson_really_inline void comma(); - /** Start an array, prints [ **/ - simdjson_really_inline void start_array(); - /** End an array, prints ] **/ - simdjson_really_inline void end_array(); - /** Start an array, prints { **/ - simdjson_really_inline void start_object(); - /** Start an array, prints } **/ - simdjson_really_inline void end_object(); - /** Prints a true **/ - simdjson_really_inline void true_atom(); - /** Prints a false **/ - simdjson_really_inline void false_atom(); - /** Prints a null **/ - simdjson_really_inline void null_atom(); - /** Prints a number **/ - simdjson_really_inline void number(int64_t x); - /** Prints a number **/ - simdjson_really_inline void number(uint64_t x); - /** Prints a number **/ - simdjson_really_inline void number(double x); - /** Prints a key (string + colon) **/ - simdjson_really_inline void key(std::string_view unescaped); - /** Prints a string. The string is escaped as needed. **/ - simdjson_really_inline void string(std::string_view unescaped); - /** Clears out the content. **/ - simdjson_really_inline void clear(); - /** - * Get access to the buffer, it is owned by the instance, but - * the user can make a copy. - **/ - simdjson_really_inline std::string_view str() const; - -private: - // implementation details (subject to change) - /** Prints one character **/ - simdjson_really_inline void one_char(char c); - /** Backing buffer **/ - std::vector buffer{}; // not ideal! -}; - -} // internal - -namespace dom { - -/** - * Print JSON to an output stream. - * - * @param out The output stream. - * @param value The element. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); -} -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#endif -/** - * Print JSON to an output stream. - * - * @param out The output stream. - * @param value The array. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); -} -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#endif -/** - * Print JSON to an output stream. - * - * @param out The output stream. - * @param value The object. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); -} -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#endif -} // namespace dom - -/** - * Converts JSON to a string. - * - * dom::parser parser; - * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); - * cout << to_string(doc) << endl; // prints [1,2,3] - * - */ -template -std::string to_string(T x) { - // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/ - // Currently minify and to_string are identical but in the future, they may - // differ. - simdjson::internal::string_builder<> sb; - sb.append(x); - std::string_view answer = sb.str(); - return std::string(answer.data(), answer.size()); -} -#if SIMDJSON_EXCEPTIONS -template -std::string to_string(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); -} -#endif - -/** - * Minifies a JSON element or document, printing the smallest possible valid JSON. - * - * dom::parser parser; - * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); - * cout << minify(doc) << endl; // prints [1,2,3] - * - */ -template -std::string minify(T x) { - return to_string(x); -} - -#if SIMDJSON_EXCEPTIONS -template -std::string minify(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); -} -#endif - - -} // namespace simdjson - - -#endif -/* end file include/simdjson/dom/serialization.h */ - -// Deprecated API -/* begin file include/simdjson/dom/jsonparser.h */ -// TODO Remove this -- deprecated API and files - -#ifndef SIMDJSON_DOM_JSONPARSER_H -#define SIMDJSON_DOM_JSONPARSER_H - -/* begin file include/simdjson/dom/parsedjson.h */ -// TODO Remove this -- deprecated API and files - -#ifndef SIMDJSON_DOM_PARSEDJSON_H -#define SIMDJSON_DOM_PARSEDJSON_H - - -namespace simdjson { - -/** - * @deprecated Use `dom::parser` instead. - */ -using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser; - -} // namespace simdjson - -#endif // SIMDJSON_DOM_PARSEDJSON_H -/* end file include/simdjson/dom/parsedjson.h */ -/* begin file include/simdjson/jsonioutil.h */ -#ifndef SIMDJSON_JSONIOUTIL_H -#define SIMDJSON_JSONIOUTIL_H - - -namespace simdjson { - -#if SIMDJSON_EXCEPTIONS -#ifndef SIMDJSON_DISABLE_DEPRECATED_API -[[deprecated("Use padded_string::load() instead")]] -inline padded_string get_corpus(const char *path) { - return padded_string::load(path); -} -#endif // SIMDJSON_DISABLE_DEPRECATED_API -#endif // SIMDJSON_EXCEPTIONS - -} // namespace simdjson - -#endif // SIMDJSON_JSONIOUTIL_H -/* end file include/simdjson/jsonioutil.h */ - -namespace simdjson { - -// -// C API (json_parse and build_parsed_json) declarations -// - -#ifndef SIMDJSON_DISABLE_DEPRECATED_API -[[deprecated("Use parser.parse() instead")]] -inline int json_parse(const uint8_t *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { - error_code code = parser.parse(buf, len, realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return code; -} -[[deprecated("Use parser.parse() instead")]] -inline int json_parse(const char *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { - error_code code = parser.parse(buf, len, realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return code; -} -[[deprecated("Use parser.parse() instead")]] -inline int json_parse(const std::string &s, dom::parser &parser, bool realloc_if_needed = true) noexcept { - error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return code; -} -[[deprecated("Use parser.parse() instead")]] -inline int json_parse(const padded_string &s, dom::parser &parser) noexcept { - error_code code = parser.parse(s).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return code; -} - -[[deprecated("Use parser.parse() instead")]] -simdjson_warn_unused inline dom::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept { - dom::parser parser; - error_code code = parser.parse(buf, len, realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return parser; -} -[[deprecated("Use parser.parse() instead")]] -simdjson_warn_unused inline dom::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept { - dom::parser parser; - error_code code = parser.parse(buf, len, realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return parser; -} -[[deprecated("Use parser.parse() instead")]] -simdjson_warn_unused inline dom::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept { - dom::parser parser; - error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return parser; -} -[[deprecated("Use parser.parse() instead")]] -simdjson_warn_unused inline dom::parser build_parsed_json(const padded_string &s) noexcept { - dom::parser parser; - error_code code = parser.parse(s).error(); - // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid - // bits in the parser instead of heeding the result code. The normal parser unsets those in - // anticipation of making the error code ephemeral. - // Here we put the code back into the parser, until we've removed this method. - parser.valid = code == SUCCESS; - parser.error = code; - return parser; -} -#endif // SIMDJSON_DISABLE_DEPRECATED_API - -/** @private We do not want to allow implicit conversion from C string to std::string. */ -int json_parse(const char *buf, dom::parser &parser) noexcept = delete; -/** @private We do not want to allow implicit conversion from C string to std::string. */ -dom::parser build_parsed_json(const char *buf) noexcept = delete; - -} // namespace simdjson - -#endif // SIMDJSON_DOM_JSONPARSER_H -/* end file include/simdjson/dom/jsonparser.h */ -/* begin file include/simdjson/dom/parsedjson_iterator.h */ -// TODO Remove this -- deprecated API and files - -#ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H -#define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H - -#include -#include -#include -#include -#include -#include - -/* begin file include/simdjson/internal/jsonformatutils.h */ -#ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H -#define SIMDJSON_INTERNAL_JSONFORMATUTILS_H - -#include -#include -#include - -namespace simdjson { -namespace internal { - -class escape_json_string; - -inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str); - -class escape_json_string { -public: - escape_json_string(std::string_view _str) noexcept : str{_str} {} - operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } -private: - std::string_view str; - friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped); -}; - -inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) { - for (size_t i=0; i(unescaped.str[i]) <= 0x1F) { - // TODO can this be done once at the beginning, or will it mess up << char? - std::ios::fmtflags f(out.flags()); - out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(unescaped.str[i]); - out.flags(f); - } else { - out << unescaped.str[i]; - } - } - } - return out; -} - -} // namespace internal -} // namespace simdjson - -#endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H -/* end file include/simdjson/internal/jsonformatutils.h */ - -#ifndef SIMDJSON_DISABLE_DEPRECATED_API - -namespace simdjson { -/** @private **/ -class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)")]] dom::parser::Iterator { -public: - inline Iterator(const dom::parser &parser) noexcept(false); - inline Iterator(const Iterator &o) noexcept; - inline ~Iterator() noexcept; - - inline Iterator& operator=(const Iterator&) = delete; - - inline bool is_ok() const; - - // useful for debugging purposes - inline size_t get_tape_location() const; - - // useful for debugging purposes - inline size_t get_tape_length() const; - - // returns the current depth (start at 1 with 0 reserved for the fictitious - // root node) - inline size_t get_depth() const; - - // A scope is a series of nodes at the same depth, typically it is either an - // object ({) or an array ([). The root node has type 'r'. - inline uint8_t get_scope_type() const; - - // move forward in document order - inline bool move_forward(); - - // retrieve the character code of what we're looking at: - // [{"slutfn are the possibilities - inline uint8_t get_type() const { - return current_type; // short functions should be inlined! - } - - // get the int64_t value at this node; valid only if get_type is "l" - inline int64_t get_integer() const { - if (location + 1 >= tape_length) { - return 0; // default value in case of error - } - return static_cast(doc.tape[location + 1]); - } - - // get the value as uint64; valid only if if get_type is "u" - inline uint64_t get_unsigned_integer() const { - if (location + 1 >= tape_length) { - return 0; // default value in case of error - } - return doc.tape[location + 1]; - } - - // get the string value at this node (NULL ended); valid only if get_type is " - // note that tabs, and line endings are escaped in the returned value (see - // print_with_escapes) return value is valid UTF-8, it may contain NULL chars - // within the string: get_string_length determines the true string length. - inline const char *get_string() const { - return reinterpret_cast( - doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t)); - } - - // return the length of the string in bytes - inline uint32_t get_string_length() const { - uint32_t answer; - std::memcpy(&answer, - reinterpret_cast(doc.string_buf.get() + - (current_val & internal::JSON_VALUE_MASK)), - sizeof(uint32_t)); - return answer; - } - - // get the double value at this node; valid only if - // get_type() is "d" - inline double get_double() const { - if (location + 1 >= tape_length) { - return std::numeric_limits::quiet_NaN(); // default value in - // case of error - } - double answer; - std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer)); - return answer; - } - - inline bool is_object_or_array() const { return is_object() || is_array(); } - - inline bool is_object() const { return get_type() == '{'; } - - inline bool is_array() const { return get_type() == '['; } - - inline bool is_string() const { return get_type() == '"'; } - - // Returns true if the current type of the node is an signed integer. - // You can get its value with `get_integer()`. - inline bool is_integer() const { return get_type() == 'l'; } - - // Returns true if the current type of the node is an unsigned integer. - // You can get its value with `get_unsigned_integer()`. - // - // NOTE: - // Only a large value, which is out of range of a 64-bit signed integer, is - // represented internally as an unsigned node. On the other hand, a typical - // positive integer, such as 1, 42, or 1000000, is as a signed node. - // Be aware this function returns false for a signed node. - inline bool is_unsigned_integer() const { return get_type() == 'u'; } - // Returns true if the current type of the node is a double floating-point number. - inline bool is_double() const { return get_type() == 'd'; } - // Returns true if the current type of the node is a number (integer or floating-point). - inline bool is_number() const { - return is_integer() || is_unsigned_integer() || is_double(); - } - // Returns true if the current type of the node is a bool with true value. - inline bool is_true() const { return get_type() == 't'; } - // Returns true if the current type of the node is a bool with false value. - inline bool is_false() const { return get_type() == 'f'; } - // Returns true if the current type of the node is null. - inline bool is_null() const { return get_type() == 'n'; } - // Returns true if the type byte represents an object of an array - static bool is_object_or_array(uint8_t type) { - return ((type == '[') || (type == '{')); - } - - // when at {, go one level deep, looking for a given key - // if successful, we are left pointing at the value, - // if not, we are still pointing at the object ({) - // (in case of repeated keys, this only finds the first one). - // We seek the key using C's strcmp so if your JSON strings contain - // NULL chars, this would trigger a false positive: if you expect that - // to be the case, take extra precautions. - // Furthermore, we do the comparison character-by-character - // without taking into account Unicode equivalence. - inline bool move_to_key(const char *key); - - // as above, but case insensitive lookup (strcmpi instead of strcmp) - inline bool move_to_key_insensitive(const char *key); - - // when at {, go one level deep, looking for a given key - // if successful, we are left pointing at the value, - // if not, we are still pointing at the object ({) - // (in case of repeated keys, this only finds the first one). - // The string we search for can contain NULL values. - // Furthermore, we do the comparison character-by-character - // without taking into account Unicode equivalence. - inline bool move_to_key(const char *key, uint32_t length); - - // when at a key location within an object, this moves to the accompanying - // value (located next to it). This is equivalent but much faster than - // calling "next()". - inline void move_to_value(); - - // when at [, go one level deep, and advance to the given index. - // if successful, we are left pointing at the value, - // if not, we are still pointing at the array ([) - inline bool move_to_index(uint32_t index); - - // Moves the iterator to the value corresponding to the json pointer. - // Always search from the root of the document. - // if successful, we are left pointing at the value, - // if not, we are still pointing the same value we were pointing before the - // call. The json pointer follows the rfc6901 standard's syntax: - // https://tools.ietf.org/html/rfc6901 However, the standard says "If a - // referenced member name is not unique in an object, the member that is - // referenced is undefined, and evaluation fails". Here we just return the - // first corresponding value. The length parameter is the length of the - // jsonpointer string ('pointer'). - inline bool move_to(const char *pointer, uint32_t length); - - // Moves the iterator to the value corresponding to the json pointer. - // Always search from the root of the document. - // if successful, we are left pointing at the value, - // if not, we are still pointing the same value we were pointing before the - // call. The json pointer implementation follows the rfc6901 standard's - // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says - // "If a referenced member name is not unique in an object, the member that - // is referenced is undefined, and evaluation fails". Here we just return - // the first corresponding value. - inline bool move_to(const std::string &pointer) { - return move_to(pointer.c_str(), uint32_t(pointer.length())); - } - - private: - // Almost the same as move_to(), except it searches from the current - // position. The pointer's syntax is identical, though that case is not - // handled by the rfc6901 standard. The '/' is still required at the - // beginning. However, contrary to move_to(), the URI Fragment Identifier - // Representation is not supported here. Also, in case of failure, we are - // left pointing at the closest value it could reach. For these reasons it - // is private. It exists because it is used by move_to(). - inline bool relative_move_to(const char *pointer, uint32_t length); - - public: - // throughout return true if we can do the navigation, false - // otherwise - - // Within a given scope (series of nodes at the same depth within either an - // array or an object), we move forward. - // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { - // and [. At the object ({) or at the array ([), you can issue a "down" to - // visit their content. valid if we're not at the end of a scope (returns - // true). - inline bool next(); - - // Within a given scope (series of nodes at the same depth within either an - // array or an object), we move backward. - // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true - // when starting at the end of the scope. At the object ({) or at the array - // ([), you can issue a "down" to visit their content. - // Performance warning: This function is implemented by starting again - // from the beginning of the scope and scanning forward. You should expect - // it to be relatively slow. - inline bool prev(); - - // Moves back to either the containing array or object (type { or [) from - // within a contained scope. - // Valid unless we are at the first level of the document - inline bool up(); - - // Valid if we're at a [ or { and it starts a non-empty scope; moves us to - // start of that deeper scope if it not empty. Thus, given [true, null, - // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node. - inline bool down(); - - // move us to the start of our current scope, - // a scope is a series of nodes at the same level - inline void to_start_scope(); - - inline void rewind() { - while (up()) - ; - } - - - - // print the node we are currently pointing at - inline bool print(std::ostream &os, bool escape_strings = true) const; - - private: - const document &doc; - size_t max_depth{}; - size_t depth{}; - size_t location{}; // our current location on a tape - size_t tape_length{}; - uint8_t current_type{}; - uint64_t current_val{}; - typedef struct { - size_t start_of_scope; - uint8_t scope_type; - } scopeindex_t; - - scopeindex_t *depth_index{}; -}; - -} // namespace simdjson -#endif // SIMDJSON_DISABLE_DEPRECATED_API - -#endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H -/* end file include/simdjson/dom/parsedjson_iterator.h */ - -// Inline functions -/* begin file include/simdjson/dom/array-inl.h */ -#ifndef SIMDJSON_INLINE_ARRAY_H -#define SIMDJSON_INLINE_ARRAY_H - -// Inline implementations go in here. - -#include - -namespace simdjson { - -// -// simdjson_result inline implementation -// -simdjson_really_inline simdjson_result::simdjson_result() noexcept - : internal::simdjson_result_base() {} -simdjson_really_inline simdjson_result::simdjson_result(dom::array value) noexcept - : internal::simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : internal::simdjson_result_base(error) {} - -#if SIMDJSON_EXCEPTIONS - -inline dom::array::iterator simdjson_result::begin() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.begin(); -} -inline dom::array::iterator simdjson_result::end() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.end(); -} -inline size_t simdjson_result::size() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.size(); -} - -#endif // SIMDJSON_EXCEPTIONS - -inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} -inline simdjson_result simdjson_result::at(size_t index) const noexcept { - if (error()) { return error(); } - return first.at(index); -} - -namespace dom { - -// -// array inline implementation -// -simdjson_really_inline array::array() noexcept : tape{} {} -simdjson_really_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} -inline array::iterator array::begin() const noexcept { - return internal::tape_ref(tape.doc, tape.json_index + 1); -} -inline array::iterator array::end() const noexcept { - return internal::tape_ref(tape.doc, tape.after_element() - 1); -} -inline size_t array::size() const noexcept { - return tape.scope_count(); -} -inline size_t array::number_of_slots() const noexcept { - return tape.matching_brace_index() - tape.json_index; -} -inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { - if(json_pointer.empty()) { // an empty string means that we return the current node - return element(this->tape); // copy the current node - } else if(json_pointer[0] != '/') { // otherwise there is an error - return INVALID_JSON_POINTER; - } - json_pointer = json_pointer.substr(1); - // - means "the append position" or "the element after the end of the array" - // We don't support this, because we're returning a real element, not a position. - if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } - - // Read the array index - size_t array_index = 0; - size_t i; - for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { - uint8_t digit = uint8_t(json_pointer[i] - '0'); - // Check for non-digit in array index. If it's there, we're trying to get a field in an object - if (digit > 9) { return INCORRECT_TYPE; } - array_index = array_index*10 + digit; - } - - // 0 followed by other digits is invalid - if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" - - // Empty string is invalid; so is a "/" with no digits before it - if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" - - // Get the child - auto child = array(tape).at(array_index); - // If there is an error, it ends here - if(child.error()) { - return child; - } - // If there is a /, we're not done yet, call recursively. - if (i < json_pointer.length()) { - child = child.at_pointer(json_pointer.substr(i)); - } - return child; -} - -inline simdjson_result array::at(size_t index) const noexcept { - size_t i=0; - for (auto element : *this) { - if (i == index) { return element; } - i++; - } - return INDEX_OUT_OF_BOUNDS; -} - -// -// array::iterator inline implementation -// -simdjson_really_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } -inline element array::iterator::operator*() const noexcept { - return element(tape); -} -inline array::iterator& array::iterator::operator++() noexcept { - tape.json_index = tape.after_element(); - return *this; -} -inline array::iterator array::iterator::operator++(int) noexcept { - array::iterator out = *this; - ++*this; - return out; -} -inline bool array::iterator::operator!=(const array::iterator& other) const noexcept { - return tape.json_index != other.tape.json_index; -} -inline bool array::iterator::operator==(const array::iterator& other) const noexcept { - return tape.json_index == other.tape.json_index; -} -inline bool array::iterator::operator<(const array::iterator& other) const noexcept { - return tape.json_index < other.tape.json_index; -} -inline bool array::iterator::operator<=(const array::iterator& other) const noexcept { - return tape.json_index <= other.tape.json_index; -} -inline bool array::iterator::operator>=(const array::iterator& other) const noexcept { - return tape.json_index >= other.tape.json_index; -} -inline bool array::iterator::operator>(const array::iterator& other) const noexcept { - return tape.json_index > other.tape.json_index; -} - -} // namespace dom - - -} // namespace simdjson - -/* begin file include/simdjson/dom/element-inl.h */ -#ifndef SIMDJSON_INLINE_ELEMENT_H -#define SIMDJSON_INLINE_ELEMENT_H - -#include -#include - -namespace simdjson { - -// -// simdjson_result inline implementation -// -simdjson_really_inline simdjson_result::simdjson_result() noexcept - : internal::simdjson_result_base() {} -simdjson_really_inline simdjson_result::simdjson_result(dom::element &&value) noexcept - : internal::simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : internal::simdjson_result_base(error) {} -inline simdjson_result simdjson_result::type() const noexcept { - if (error()) { return error(); } - return first.type(); -} - -template -simdjson_really_inline bool simdjson_result::is() const noexcept { - return !error() && first.is(); -} -template -simdjson_really_inline simdjson_result simdjson_result::get() const noexcept { - if (error()) { return error(); } - return first.get(); -} -template -simdjson_warn_unused simdjson_really_inline error_code simdjson_result::get(T &value) const noexcept { - if (error()) { return error(); } - return first.get(value); -} - -simdjson_really_inline simdjson_result simdjson_result::get_array() const noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_really_inline simdjson_result simdjson_result::get_object() const noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_really_inline simdjson_result simdjson_result::get_c_str() const noexcept { - if (error()) { return error(); } - return first.get_c_str(); -} -simdjson_really_inline simdjson_result simdjson_result::get_string_length() const noexcept { - if (error()) { return error(); } - return first.get_string_length(); -} -simdjson_really_inline simdjson_result simdjson_result::get_string() const noexcept { - if (error()) { return error(); } - return first.get_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_int64() const noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_uint64() const noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_double() const noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_really_inline simdjson_result simdjson_result::get_bool() const noexcept { - if (error()) { return error(); } - return first.get_bool(); -} - -simdjson_really_inline bool simdjson_result::is_array() const noexcept { - return !error() && first.is_array(); -} -simdjson_really_inline bool simdjson_result::is_object() const noexcept { - return !error() && first.is_object(); -} -simdjson_really_inline bool simdjson_result::is_string() const noexcept { - return !error() && first.is_string(); -} -simdjson_really_inline bool simdjson_result::is_int64() const noexcept { - return !error() && first.is_int64(); -} -simdjson_really_inline bool simdjson_result::is_uint64() const noexcept { - return !error() && first.is_uint64(); -} -simdjson_really_inline bool simdjson_result::is_double() const noexcept { - return !error() && first.is_double(); -} -simdjson_really_inline bool simdjson_result::is_number() const noexcept { - return !error() && first.is_number(); -} -simdjson_really_inline bool simdjson_result::is_bool() const noexcept { - return !error() && first.is_bool(); -} - -simdjson_really_inline bool simdjson_result::is_null() const noexcept { - return !error() && first.is_null(); -} - -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::at_pointer(const std::string_view json_pointer) const noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} -#ifndef SIMDJSON_DISABLE_DEPRECATED_API -[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] -simdjson_really_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_DEPRECATED_WARNING - if (error()) { return error(); } - return first.at(json_pointer); -SIMDJSON_POP_DISABLE_WARNINGS -} -#endif // SIMDJSON_DISABLE_DEPRECATED_API -simdjson_really_inline simdjson_result simdjson_result::at(size_t index) const noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_really_inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { - if (error()) { return error(); } - return first.at_key(key); -} -simdjson_really_inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { - if (error()) { return error(); } - return first.at_key_case_insensitive(key); -} - -#if SIMDJSON_EXCEPTIONS - -simdjson_really_inline simdjson_result::operator bool() const noexcept(false) { - return get(); -} -simdjson_really_inline simdjson_result::operator const char *() const noexcept(false) { - return get(); -} -simdjson_really_inline simdjson_result::operator std::string_view() const noexcept(false) { - return get(); -} -simdjson_really_inline simdjson_result::operator uint64_t() const noexcept(false) { - return get(); -} -simdjson_really_inline simdjson_result::operator int64_t() const noexcept(false) { - return get(); -} -simdjson_really_inline simdjson_result::operator double() const noexcept(false) { - return get(); -} -simdjson_really_inline simdjson_result::operator dom::array() const noexcept(false) { - return get(); -} -simdjson_really_inline simdjson_result::operator dom::object() const noexcept(false) { - return get(); -} - -simdjson_really_inline dom::array::iterator simdjson_result::begin() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.begin(); -} -simdjson_really_inline dom::array::iterator simdjson_result::end() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.end(); -} - -#endif // SIMDJSON_EXCEPTIONS - -namespace dom { - -// -// element inline implementation -// -simdjson_really_inline element::element() noexcept : tape{} {} -simdjson_really_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { } - -inline element_type element::type() const noexcept { - auto tape_type = tape.tape_ref_type(); - return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast(tape_type); -} - -inline simdjson_result element::get_bool() const noexcept { - if(tape.is_true()) { - return true; - } else if(tape.is_false()) { - return false; - } - return INCORRECT_TYPE; -} -inline simdjson_result element::get_c_str() const noexcept { - switch (tape.tape_ref_type()) { - case internal::tape_type::STRING: { - return tape.get_c_str(); - } - default: - return INCORRECT_TYPE; - } -} -inline simdjson_result element::get_string_length() const noexcept { - switch (tape.tape_ref_type()) { - case internal::tape_type::STRING: { - return tape.get_string_length(); - } - default: - return INCORRECT_TYPE; - } -} -inline simdjson_result element::get_string() const noexcept { - switch (tape.tape_ref_type()) { - case internal::tape_type::STRING: - return tape.get_string_view(); - default: - return INCORRECT_TYPE; - } -} -inline simdjson_result element::get_uint64() const noexcept { - if(simdjson_unlikely(!tape.is_uint64())) { // branch rarely taken - if(tape.is_int64()) { - int64_t result = tape.next_tape_value(); - if (result < 0) { - return NUMBER_OUT_OF_RANGE; - } - return uint64_t(result); - } - return INCORRECT_TYPE; - } - return tape.next_tape_value(); -} -inline simdjson_result element::get_int64() const noexcept { - if(simdjson_unlikely(!tape.is_int64())) { // branch rarely taken - if(tape.is_uint64()) { - uint64_t result = tape.next_tape_value(); - // Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std - if (result > uint64_t((std::numeric_limits::max)())) { - return NUMBER_OUT_OF_RANGE; - } - return static_cast(result); - } - return INCORRECT_TYPE; - } - return tape.next_tape_value(); -} -inline simdjson_result element::get_double() const noexcept { - // Performance considerations: - // 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight - // comparison. - // 2. Using a switch-case relies on the compiler guessing what kind of code generation - // we want... But the compiler cannot know that we expect the type to be "double" - // most of the time. - // We can expect get to refer to a double type almost all the time. - // It is important to craft the code accordingly so that the compiler can use this - // information. (This could also be solved with profile-guided optimization.) - if(simdjson_unlikely(!tape.is_double())) { // branch rarely taken - if(tape.is_uint64()) { - return double(tape.next_tape_value()); - } else if(tape.is_int64()) { - return double(tape.next_tape_value()); - } - return INCORRECT_TYPE; - } - // this is common: - return tape.next_tape_value(); -} -inline simdjson_result element::get_array() const noexcept { - switch (tape.tape_ref_type()) { - case internal::tape_type::START_ARRAY: - return array(tape); - default: - return INCORRECT_TYPE; - } -} -inline simdjson_result element::get_object() const noexcept { - switch (tape.tape_ref_type()) { - case internal::tape_type::START_OBJECT: - return object(tape); - default: - return INCORRECT_TYPE; - } -} - -template -simdjson_warn_unused simdjson_really_inline error_code element::get(T &value) const noexcept { - return get().get(value); -} -// An element-specific version prevents recursion with simdjson_result::get(value) -template<> -simdjson_warn_unused simdjson_really_inline error_code element::get(element &value) const noexcept { - value = element(tape); - return SUCCESS; -} -template -inline void element::tie(T &value, error_code &error) && noexcept { - error = get(value); -} - -template -simdjson_really_inline bool element::is() const noexcept { - auto result = get(); - return !result.error(); -} - -template<> inline simdjson_result element::get() const noexcept { return get_array(); } -template<> inline simdjson_result element::get() const noexcept { return get_object(); } -template<> inline simdjson_result element::get() const noexcept { return get_c_str(); } -template<> inline simdjson_result element::get() const noexcept { return get_string(); } -template<> inline simdjson_result element::get() const noexcept { return get_int64(); } -template<> inline simdjson_result element::get() const noexcept { return get_uint64(); } -template<> inline simdjson_result element::get() const noexcept { return get_double(); } -template<> inline simdjson_result element::get() const noexcept { return get_bool(); } - -inline bool element::is_array() const noexcept { return is(); } -inline bool element::is_object() const noexcept { return is(); } -inline bool element::is_string() const noexcept { return is(); } -inline bool element::is_int64() const noexcept { return is(); } -inline bool element::is_uint64() const noexcept { return is(); } -inline bool element::is_double() const noexcept { return is(); } -inline bool element::is_bool() const noexcept { return is(); } -inline bool element::is_number() const noexcept { return is_int64() || is_uint64() || is_double(); } - -inline bool element::is_null() const noexcept { - return tape.is_null_on_tape(); -} - -#if SIMDJSON_EXCEPTIONS - -inline element::operator bool() const noexcept(false) { return get(); } -inline element::operator const char*() const noexcept(false) { return get(); } -inline element::operator std::string_view() const noexcept(false) { return get(); } -inline element::operator uint64_t() const noexcept(false) { return get(); } -inline element::operator int64_t() const noexcept(false) { return get(); } -inline element::operator double() const noexcept(false) { return get(); } -inline element::operator array() const noexcept(false) { return get(); } -inline element::operator object() const noexcept(false) { return get(); } - -inline array::iterator element::begin() const noexcept(false) { - return get().begin(); -} -inline array::iterator element::end() const noexcept(false) { - return get().end(); -} - -#endif // SIMDJSON_EXCEPTIONS - -inline simdjson_result element::operator[](std::string_view key) const noexcept { - return at_key(key); -} -inline simdjson_result element::operator[](const char *key) const noexcept { - return at_key(key); -} - -inline simdjson_result element::at_pointer(std::string_view json_pointer) const noexcept { - switch (tape.tape_ref_type()) { - case internal::tape_type::START_OBJECT: - return object(tape).at_pointer(json_pointer); - case internal::tape_type::START_ARRAY: - return array(tape).at_pointer(json_pointer); - default: { - if(!json_pointer.empty()) { // a non-empty string is invalid on an atom - return INVALID_JSON_POINTER; - } - // an empty string means that we return the current node - dom::element copy(*this); - return simdjson_result(std::move(copy)); - } - } -} -#ifndef SIMDJSON_DISABLE_DEPRECATED_API -[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] -inline simdjson_result element::at(std::string_view json_pointer) const noexcept { - // version 0.4 of simdjson allowed non-compliant pointers - auto std_pointer = (json_pointer.empty() ? "" : "/") + std::string(json_pointer.begin(), json_pointer.end()); - return at_pointer(std_pointer); -} -#endif // SIMDJSON_DISABLE_DEPRECATED_API - -inline simdjson_result element::at(size_t index) const noexcept { - return get().at(index); -} -inline simdjson_result element::at_key(std::string_view key) const noexcept { - return get().at_key(key); -} -inline simdjson_result element::at_key_case_insensitive(std::string_view key) const noexcept { - return get().at_key_case_insensitive(key); -} - -inline bool element::dump_raw_tape(std::ostream &out) const noexcept { - return tape.doc->dump_raw_tape(out); -} - - -inline std::ostream& operator<<(std::ostream& out, element_type type) { - switch (type) { - case element_type::ARRAY: - return out << "array"; - case element_type::OBJECT: - return out << "object"; - case element_type::INT64: - return out << "int64_t"; - case element_type::UINT64: - return out << "uint64_t"; - case element_type::DOUBLE: - return out << "double"; - case element_type::STRING: - return out << "string"; - case element_type::BOOL: - return out << "bool"; - case element_type::NULL_VALUE: - return out << "null"; - default: - return out << "unexpected content!!!"; // abort() usage is forbidden in the library - } -} - -} // namespace dom - -} // namespace simdjson - -#endif // SIMDJSON_INLINE_ELEMENT_H -/* end file include/simdjson/dom/element-inl.h */ - -#if defined(__cpp_lib_ranges) -static_assert(std::ranges::view); -static_assert(std::ranges::sized_range); -#if SIMDJSON_EXCEPTIONS -static_assert(std::ranges::view>); -static_assert(std::ranges::sized_range>); -#endif // SIMDJSON_EXCEPTIONS -#endif // defined(__cpp_lib_ranges) - -#endif // SIMDJSON_INLINE_ARRAY_H -/* end file include/simdjson/dom/array-inl.h */ -/* begin file include/simdjson/dom/document_stream-inl.h */ -#ifndef SIMDJSON_INLINE_DOCUMENT_STREAM_H -#define SIMDJSON_INLINE_DOCUMENT_STREAM_H - -#include -#include -#include -namespace simdjson { -namespace dom { - -#ifdef SIMDJSON_THREADS_ENABLED -inline void stage1_worker::finish() { - // After calling "run" someone would call finish() to wait - // for the end of the processing. - // This function will wait until either the thread has done - // the processing or, else, the destructor has been called. - std::unique_lock lock(locking_mutex); - cond_var.wait(lock, [this]{return has_work == false;}); -} - -inline stage1_worker::~stage1_worker() { - // The thread may never outlive the stage1_worker instance - // and will always be stopped/joined before the stage1_worker - // instance is gone. - stop_thread(); -} - -inline void stage1_worker::start_thread() { - std::unique_lock lock(locking_mutex); - if(thread.joinable()) { - return; // This should never happen but we never want to create more than one thread. - } - thread = std::thread([this]{ - while(true) { - std::unique_lock thread_lock(locking_mutex); - // We wait for either "run" or "stop_thread" to be called. - cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); - // If, for some reason, the stop_thread() method was called (i.e., the - // destructor of stage1_worker is called, then we want to immediately destroy - // the thread (and not do any more processing). - if(!can_work) { - break; - } - this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, - this->_next_batch_start); - this->has_work = false; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify "finish" - thread_lock.unlock(); - } - } - ); -} - - -inline void stage1_worker::stop_thread() { - std::unique_lock lock(locking_mutex); - // We have to make sure that all locks can be released. - can_work = false; - has_work = false; - cond_var.notify_all(); - lock.unlock(); - if(thread.joinable()) { - thread.join(); - } -} - -inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_t next_batch_start) { - std::unique_lock lock(locking_mutex); - owner = ds; - _next_batch_start = next_batch_start; - stage1_thread_parser = stage1; - has_work = true; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify the thread lock that we have work - lock.unlock(); -} -#endif - -simdjson_really_inline document_stream::document_stream( - dom::parser &_parser, - const uint8_t *_buf, - size_t _len, - size_t _batch_size -) noexcept - : parser{&_parser}, - buf{_buf}, - len{_len}, - batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, - error{SUCCESS} -#ifdef SIMDJSON_THREADS_ENABLED - , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change -#endif -{ -#ifdef SIMDJSON_THREADS_ENABLED - if(worker.get() == nullptr) { - error = MEMALLOC; - } -#endif -} - -simdjson_really_inline document_stream::document_stream() noexcept - : parser{nullptr}, - buf{nullptr}, - len{0}, - batch_size{0}, - error{UNINITIALIZED} -#ifdef SIMDJSON_THREADS_ENABLED - , use_thread(false) -#endif -{ -} - -simdjson_really_inline document_stream::~document_stream() noexcept { -#ifdef SIMDJSON_THREADS_ENABLED - worker.reset(); -#endif -} - -simdjson_really_inline document_stream::iterator::iterator() noexcept - : stream{nullptr}, finished{true} { -} - -simdjson_really_inline document_stream::iterator document_stream::begin() noexcept { - start(); - // If there are no documents, we're finished. - return iterator(this, error == EMPTY); -} - -simdjson_really_inline document_stream::iterator document_stream::end() noexcept { - return iterator(this, true); -} - -simdjson_really_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { -} - -simdjson_really_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept { - // Note that in case of error, we do not yet mark - // the iterator as "finished": this detection is done - // in the operator++ function since it is possible - // to call operator++ repeatedly while omitting - // calls to operator*. - if (stream->error) { return stream->error; } - return stream->parser->doc.root(); -} - -simdjson_really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - // If there is an error, then we want the iterator - // to be finished, no matter what. (E.g., we do not - // keep generating documents with errors, or go beyond - // a document with errors.) - // - // Users do not have to call "operator*()" when they use operator++, - // so we need to end the stream in the operator++ function. - // - // Note that setting finished = true is essential otherwise - // we would enter an infinite loop. - if (stream->error) { finished = true; } - // Note that stream->error() is guarded against error conditions - // (it will immediately return if stream->error casts to false). - // In effect, this next function does nothing when (stream->error) - // is true (hence the risk of an infinite loop). - stream->next(); - // If that was the last document, we're finished. - // It is the only type of error we do not want to appear - // in operator*. - if (stream->error == EMPTY) { finished = true; } - // If we had any other kind of error (not EMPTY) then we want - // to pass it along to the operator* and we cannot mark the result - // as "finished" just yet. - return *this; -} - -simdjson_really_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; -} - -inline void document_stream::start() noexcept { - if (error) { return; } - error = parser->ensure_capacity(batch_size); - if (error) { return; } - // Always run the first stage 1 parse immediately - batch_start = 0; - error = run_stage1(*parser, batch_start); - while(error == EMPTY) { - // In exceptional cases, we may start with an empty block - batch_start = next_batch_start(); - if (batch_start >= len) { return; } - error = run_stage1(*parser, batch_start); - } - if (error) { return; } -#ifdef SIMDJSON_THREADS_ENABLED - if (use_thread && next_batch_start() < len) { - // Kick off the first thread if needed - error = stage1_thread_parser.ensure_capacity(batch_size); - if (error) { return; } - worker->start_thread(); - start_stage1_thread(); - if (error) { return; } - } -#endif // SIMDJSON_THREADS_ENABLED - next(); -} - -simdjson_really_inline size_t document_stream::iterator::current_index() const noexcept { - return stream->doc_index; -} - -simdjson_really_inline std::string_view document_stream::iterator::source() const noexcept { - const char* start = reinterpret_cast(stream->buf) + current_index(); - bool object_or_array = ((*start == '[') || (*start == '{')); - if(object_or_array) { - size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index - 1]; - return std::string_view(start, next_doc_index - current_index() + 1); - } else { - size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index]; - return std::string_view(reinterpret_cast(stream->buf) + current_index(), next_doc_index - current_index() - 1); - } -} - - -inline void document_stream::next() noexcept { - // We always exit at once, once in an error condition. - if (error) { return; } - - // Load the next document from the batch - doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; - error = parser->implementation->stage2_next(parser->doc); - // If that was the last document in the batch, load another batch (if available) - while (error == EMPTY) { - batch_start = next_batch_start(); - if (batch_start >= len) { break; } - -#ifdef SIMDJSON_THREADS_ENABLED - if(use_thread) { - load_from_stage1_thread(); - } else { - error = run_stage1(*parser, batch_start); - } -#else - error = run_stage1(*parser, batch_start); -#endif - if (error) { continue; } // If the error was EMPTY, we may want to load another batch. - // Run stage 2 on the first document in the batch - doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; - error = parser->implementation->stage2_next(parser->doc); - } -} -inline size_t document_stream::size_in_bytes() const noexcept { - return len; -} - -inline size_t document_stream::truncated_bytes() const noexcept { - if(error == CAPACITY) { return len - batch_start; } - return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; -} - -inline size_t document_stream::next_batch_start() const noexcept { - return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; -} - -inline error_code document_stream::run_stage1(dom::parser &p, size_t _batch_start) noexcept { - size_t remaining = len - _batch_start; - if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); - } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); - } -} - -#ifdef SIMDJSON_THREADS_ENABLED - -inline void document_stream::load_from_stage1_thread() noexcept { - worker->finish(); - // Swap to the parser that was loaded up in the thread. Make sure the parser has - // enough memory to swap to, as well. - std::swap(*parser, stage1_thread_parser); - error = stage1_thread_error; - if (error) { return; } - - // If there's anything left, start the stage 1 thread! - if (next_batch_start() < len) { - start_stage1_thread(); - } -} - -inline void document_stream::start_stage1_thread() noexcept { - // we call the thread on a lambda that will update - // this->stage1_thread_error - // there is only one thread that may write to this value - // TODO this is NOT exception-safe. - this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error - size_t _next_batch_start = this->next_batch_start(); - - worker->run(this, & this->stage1_thread_parser, _next_batch_start); -} - -#endif // SIMDJSON_THREADS_ENABLED - -} // namespace dom - -simdjson_really_inline simdjson_result::simdjson_result() noexcept - : simdjson_result_base() { -} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : simdjson_result_base(error) { -} -simdjson_really_inline simdjson_result::simdjson_result(dom::document_stream &&value) noexcept - : simdjson_result_base(std::forward(value)) { -} - -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline dom::document_stream::iterator simdjson_result::begin() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.begin(); -} -simdjson_really_inline dom::document_stream::iterator simdjson_result::end() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.end(); -} -#else // SIMDJSON_EXCEPTIONS -#ifndef SIMDJSON_DISABLE_DEPRECATED_API -simdjson_really_inline dom::document_stream::iterator simdjson_result::begin() noexcept { - first.error = error(); - return first.begin(); -} -simdjson_really_inline dom::document_stream::iterator simdjson_result::end() noexcept { - first.error = error(); - return first.end(); -} -#endif // SIMDJSON_DISABLE_DEPRECATED_API -#endif // SIMDJSON_EXCEPTIONS - -} // namespace simdjson -#endif // SIMDJSON_INLINE_DOCUMENT_STREAM_H -/* end file include/simdjson/dom/document_stream-inl.h */ -/* begin file include/simdjson/dom/document-inl.h */ -#ifndef SIMDJSON_INLINE_DOCUMENT_H -#define SIMDJSON_INLINE_DOCUMENT_H - -// Inline implementations go in here. - -#include -#include - -namespace simdjson { -namespace dom { - -// -// document inline implementation -// -inline element document::root() const noexcept { - return element(internal::tape_ref(this, 1)); -} -simdjson_warn_unused -inline size_t document::capacity() const noexcept { - return allocated_capacity; -} - -simdjson_warn_unused -inline error_code document::allocate(size_t capacity) noexcept { - if (capacity == 0) { - string_buf.reset(); - tape.reset(); - allocated_capacity = 0; - return SUCCESS; - } - - // a pathological input like "[[[[..." would generate capacity tape elements, so - // need a capacity of at least capacity + 1, but it is also possible to do - // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" - //where capacity + 1 tape elements are - // generated, see issue https://github.com/simdjson/simdjson/issues/345 - size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64); - // a document with only zero-length strings... could have capacity/3 string - // and we would need capacity/3 * 5 bytes on the string buffer - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset( new (std::nothrow) uint8_t[string_capacity]); - tape.reset(new (std::nothrow) uint64_t[tape_capacity]); - if(!(string_buf && tape)) { - allocated_capacity = 0; - string_buf.reset(); - tape.reset(); - return MEMALLOC; - } - // Technically the allocated_capacity might be larger than capacity - // so the next line is pessimistic. - allocated_capacity = capacity; - return SUCCESS; -} - -inline bool document::dump_raw_tape(std::ostream &os) const noexcept { - uint32_t string_length; - size_t tape_idx = 0; - uint64_t tape_val = tape[tape_idx]; - uint8_t type = uint8_t(tape_val >> 56); - os << tape_idx << " : " << type; - tape_idx++; - size_t how_many = 0; - if (type == 'r') { - how_many = size_t(tape_val & internal::JSON_VALUE_MASK); - } else { - // Error: no starting root node? - return false; - } - os << "\t// pointing to " << how_many << " (right after last node)\n"; - uint64_t payload; - for (; tape_idx < how_many; tape_idx++) { - os << tape_idx << " : "; - tape_val = tape[tape_idx]; - payload = tape_val & internal::JSON_VALUE_MASK; - type = uint8_t(tape_val >> 56); - switch (type) { - case '"': // we have a string - os << "string \""; - std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); - os << internal::escape_json_string(std::string_view( - reinterpret_cast(string_buf.get() + payload + sizeof(uint32_t)), - string_length - )); - os << '"'; - os << '\n'; - break; - case 'l': // we have a long int - if (tape_idx + 1 >= how_many) { - return false; - } - os << "integer " << static_cast(tape[++tape_idx]) << "\n"; - break; - case 'u': // we have a long uint - if (tape_idx + 1 >= how_many) { - return false; - } - os << "unsigned integer " << tape[++tape_idx] << "\n"; - break; - case 'd': // we have a double - os << "float "; - if (tape_idx + 1 >= how_many) { - return false; - } - double answer; - std::memcpy(&answer, &tape[++tape_idx], sizeof(answer)); - os << answer << '\n'; - break; - case 'n': // we have a null - os << "null\n"; - break; - case 't': // we have a true - os << "true\n"; - break; - case 'f': // we have a false - os << "false\n"; - break; - case '{': // we have an object - os << "{\t// pointing to next tape location " << uint32_t(payload) - << " (first node after the scope), " - << " saturated count " - << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; - break; case '}': // we end an object - os << "}\t// pointing to previous tape location " << uint32_t(payload) - << " (start of the scope)\n"; - break; - case '[': // we start an array - os << "[\t// pointing to next tape location " << uint32_t(payload) - << " (first node after the scope), " - << " saturated count " - << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; - break; - case ']': // we end an array - os << "]\t// pointing to previous tape location " << uint32_t(payload) - << " (start of the scope)\n"; - break; - case 'r': // we start and end with the root node - // should we be hitting the root node? - return false; - default: - return false; - } - } - tape_val = tape[tape_idx]; - payload = tape_val & internal::JSON_VALUE_MASK; - type = uint8_t(tape_val >> 56); - os << tape_idx << " : " << type << "\t// pointing to " << payload - << " (start root)\n"; - return true; -} - -} // namespace dom -} // namespace simdjson - -#endif // SIMDJSON_INLINE_DOCUMENT_H -/* end file include/simdjson/dom/document-inl.h */ -/* begin file include/simdjson/dom/object-inl.h */ -#ifndef SIMDJSON_INLINE_OBJECT_H -#define SIMDJSON_INLINE_OBJECT_H - -#include -#include - -namespace simdjson { - -// -// simdjson_result inline implementation -// -simdjson_really_inline simdjson_result::simdjson_result() noexcept - : internal::simdjson_result_base() {} -simdjson_really_inline simdjson_result::simdjson_result(dom::object value) noexcept - : internal::simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : internal::simdjson_result_base(error) {} - -inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { - if (error()) { return error(); } - return first[key]; -} -inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { - if (error()) { return error(); } - return first[key]; -} -inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} -inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { - if (error()) { return error(); } - return first.at_key(key); -} -inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { - if (error()) { return error(); } - return first.at_key_case_insensitive(key); -} - -#if SIMDJSON_EXCEPTIONS - -inline dom::object::iterator simdjson_result::begin() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.begin(); -} -inline dom::object::iterator simdjson_result::end() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.end(); -} -inline size_t simdjson_result::size() const noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first.size(); -} - -#endif // SIMDJSON_EXCEPTIONS - -namespace dom { - -// -// object inline implementation -// -simdjson_really_inline object::object() noexcept : tape{} {} -simdjson_really_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { } -inline object::iterator object::begin() const noexcept { - return internal::tape_ref(tape.doc, tape.json_index + 1); -} -inline object::iterator object::end() const noexcept { - return internal::tape_ref(tape.doc, tape.after_element() - 1); -} -inline size_t object::size() const noexcept { - return tape.scope_count(); -} - -inline simdjson_result object::operator[](std::string_view key) const noexcept { - return at_key(key); -} -inline simdjson_result object::operator[](const char *key) const noexcept { - return at_key(key); -} -inline simdjson_result object::at_pointer(std::string_view json_pointer) const noexcept { - if(json_pointer.empty()) { // an empty string means that we return the current node - return element(this->tape); // copy the current node - } else if(json_pointer[0] != '/') { // otherwise there is an error - return INVALID_JSON_POINTER; - } - json_pointer = json_pointer.substr(1); - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); - // Grab the child with the given key - simdjson_result child; - - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = at_key(unescaped); - } else { - child = at_key(key); - } - if(child.error()) { - return child; // we do not continue if there was an error - } - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at_pointer(json_pointer.substr(slash)); - } - return child; -} - -inline simdjson_result object::at_key(std::string_view key) const noexcept { - iterator end_field = end(); - for (iterator field = begin(); field != end_field; ++field) { - if (field.key_equals(key)) { - return field.value(); - } - } - return NO_SUCH_FIELD; -} -// In case you wonder why we need this, please see -// https://github.com/simdjson/simdjson/issues/323 -// People do seek keys in a case-insensitive manner. -inline simdjson_result object::at_key_case_insensitive(std::string_view key) const noexcept { - iterator end_field = end(); - for (iterator field = begin(); field != end_field; ++field) { - if (field.key_equals_case_insensitive(key)) { - return field.value(); - } - } - return NO_SUCH_FIELD; -} - -// -// object::iterator inline implementation -// -simdjson_really_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } -inline const key_value_pair object::iterator::operator*() const noexcept { - return key_value_pair(key(), value()); -} -inline bool object::iterator::operator!=(const object::iterator& other) const noexcept { - return tape.json_index != other.tape.json_index; -} -inline bool object::iterator::operator==(const object::iterator& other) const noexcept { - return tape.json_index == other.tape.json_index; -} -inline bool object::iterator::operator<(const object::iterator& other) const noexcept { - return tape.json_index < other.tape.json_index; -} -inline bool object::iterator::operator<=(const object::iterator& other) const noexcept { - return tape.json_index <= other.tape.json_index; -} -inline bool object::iterator::operator>=(const object::iterator& other) const noexcept { - return tape.json_index >= other.tape.json_index; -} -inline bool object::iterator::operator>(const object::iterator& other) const noexcept { - return tape.json_index > other.tape.json_index; -} -inline object::iterator& object::iterator::operator++() noexcept { - tape.json_index++; - tape.json_index = tape.after_element(); - return *this; -} -inline object::iterator object::iterator::operator++(int) noexcept { - object::iterator out = *this; - ++*this; - return out; -} -inline std::string_view object::iterator::key() const noexcept { - return tape.get_string_view(); -} -inline uint32_t object::iterator::key_length() const noexcept { - return tape.get_string_length(); -} -inline const char* object::iterator::key_c_str() const noexcept { - return reinterpret_cast(&tape.doc->string_buf[size_t(tape.tape_value()) + sizeof(uint32_t)]); -} -inline element object::iterator::value() const noexcept { - return element(internal::tape_ref(tape.doc, tape.json_index + 1)); -} - -/** - * Design notes: - * Instead of constructing a string_view and then comparing it with a - * user-provided strings, it is probably more performant to have dedicated - * functions taking as a parameter the string we want to compare against - * and return true when they are equal. That avoids the creation of a temporary - * std::string_view. Though it is possible for the compiler to avoid entirely - * any overhead due to string_view, relying too much on compiler magic is - * problematic: compiler magic sometimes fail, and then what do you do? - * Also, enticing users to rely on high-performance function is probably better - * on the long run. - */ - -inline bool object::iterator::key_equals(std::string_view o) const noexcept { - // We use the fact that the key length can be computed quickly - // without access to the string buffer. - const uint32_t len = key_length(); - if(o.size() == len) { - // We avoid construction of a temporary string_view instance. - return (memcmp(o.data(), key_c_str(), len) == 0); - } - return false; -} - -inline bool object::iterator::key_equals_case_insensitive(std::string_view o) const noexcept { - // We use the fact that the key length can be computed quickly - // without access to the string buffer. - const uint32_t len = key_length(); - if(o.size() == len) { - // See For case-insensitive string comparisons, avoid char-by-char functions - // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/ - // Note that it might be worth rolling our own strncasecmp function, with vectorization. - return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0); - } - return false; -} -// -// key_value_pair inline implementation -// -inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept : - key(_key), value(_value) {} - -} // namespace dom - -} // namespace simdjson - -#if defined(__cpp_lib_ranges) -static_assert(std::ranges::view); -static_assert(std::ranges::sized_range); -#if SIMDJSON_EXCEPTIONS -static_assert(std::ranges::view>); -static_assert(std::ranges::sized_range>); -#endif // SIMDJSON_EXCEPTIONS -#endif // defined(__cpp_lib_ranges) - -#endif // SIMDJSON_INLINE_OBJECT_H -/* end file include/simdjson/dom/object-inl.h */ -/* begin file include/simdjson/dom/parsedjson_iterator-inl.h */ -#ifndef SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H -#define SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H - -#include - -#ifndef SIMDJSON_DISABLE_DEPRECATED_API - -namespace simdjson { - -// VS2017 reports deprecated warnings when you define a deprecated class's methods. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_DEPRECATED_WARNING - -// Because of template weirdness, the actual class definition is inline in the document class -simdjson_warn_unused bool dom::parser::Iterator::is_ok() const { - return location < tape_length; -} - -// useful for debugging purposes -size_t dom::parser::Iterator::get_tape_location() const { - return location; -} - -// useful for debugging purposes -size_t dom::parser::Iterator::get_tape_length() const { - return tape_length; -} - -// returns the current depth (start at 1 with 0 reserved for the fictitious root -// node) -size_t dom::parser::Iterator::get_depth() const { - return depth; -} - -// A scope is a series of nodes at the same depth, typically it is either an -// object ({) or an array ([). The root node has type 'r'. -uint8_t dom::parser::Iterator::get_scope_type() const { - return depth_index[depth].scope_type; -} - -bool dom::parser::Iterator::move_forward() { - if (location + 1 >= tape_length) { - return false; // we are at the end! - } - - if ((current_type == '[') || (current_type == '{')) { - // We are entering a new scope - depth++; - assert(depth < max_depth); - depth_index[depth].start_of_scope = location; - depth_index[depth].scope_type = current_type; - } else if ((current_type == ']') || (current_type == '}')) { - // Leaving a scope. - depth--; - } else if (is_number()) { - // these types use 2 locations on the tape, not just one. - location += 1; - } - - location += 1; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); - return true; -} - -void dom::parser::Iterator::move_to_value() { - // assume that we are on a key, so move by 1. - location += 1; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); -} - -bool dom::parser::Iterator::move_to_key(const char *key) { - if (down()) { - do { - const bool right_key = (strcmp(get_string(), key) == 0); - move_to_value(); - if (right_key) { - return true; - } - } while (next()); - up(); - } - return false; -} - -bool dom::parser::Iterator::move_to_key_insensitive( - const char *key) { - if (down()) { - do { - const bool right_key = (simdjson_strcasecmp(get_string(), key) == 0); - move_to_value(); - if (right_key) { - return true; - } - } while (next()); - up(); - } - return false; -} - -bool dom::parser::Iterator::move_to_key(const char *key, - uint32_t length) { - if (down()) { - do { - bool right_key = ((get_string_length() == length) && - (memcmp(get_string(), key, length) == 0)); - move_to_value(); - if (right_key) { - return true; - } - } while (next()); - up(); - } - return false; -} - -bool dom::parser::Iterator::move_to_index(uint32_t index) { - if (down()) { - uint32_t i = 0; - for (; i < index; i++) { - if (!next()) { - break; - } - } - if (i == index) { - return true; - } - up(); - } - return false; -} - -bool dom::parser::Iterator::prev() { - size_t target_location = location; - to_start_scope(); - size_t npos = location; - if (target_location == npos) { - return false; // we were already at the start - } - size_t oldnpos; - // we have that npos < target_location here - do { - oldnpos = npos; - if ((current_type == '[') || (current_type == '{')) { - // we need to jump - npos = uint32_t(current_val); - } else { - npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1); - } - } while (npos < target_location); - location = oldnpos; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); - return true; -} - -bool dom::parser::Iterator::up() { - if (depth == 1) { - return false; // don't allow moving back to root - } - to_start_scope(); - // next we just move to the previous value - depth--; - location -= 1; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); - return true; -} - -bool dom::parser::Iterator::down() { - if (location + 1 >= tape_length) { - return false; - } - if ((current_type == '[') || (current_type == '{')) { - size_t npos = uint32_t(current_val); - if (npos == location + 2) { - return false; // we have an empty scope - } - depth++; - assert(depth < max_depth); - location = location + 1; - depth_index[depth].start_of_scope = location; - depth_index[depth].scope_type = current_type; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); - return true; - } - return false; -} - -void dom::parser::Iterator::to_start_scope() { - location = depth_index[depth].start_of_scope; - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); -} - -bool dom::parser::Iterator::next() { - size_t npos; - if ((current_type == '[') || (current_type == '{')) { - // we need to jump - npos = uint32_t(current_val); - } else { - npos = location + (is_number() ? 2 : 1); - } - uint64_t next_val = doc.tape[npos]; - uint8_t next_type = uint8_t(next_val >> 56); - if ((next_type == ']') || (next_type == '}')) { - return false; // we reached the end of the scope - } - location = npos; - current_val = next_val; - current_type = next_type; - return true; -} -dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false) - : doc(pj.doc) -{ -#if SIMDJSON_EXCEPTIONS - if (!pj.valid) { throw simdjson_error(pj.error); } -#else - if (!pj.valid) { return; } // abort() usage is forbidden in the library -#endif - - max_depth = pj.max_depth(); - depth_index = new scopeindex_t[max_depth + 1]; - depth_index[0].start_of_scope = location; - current_val = doc.tape[location++]; - current_type = uint8_t(current_val >> 56); - depth_index[0].scope_type = current_type; - tape_length = size_t(current_val & internal::JSON_VALUE_MASK); - if (location < tape_length) { - // If we make it here, then depth_capacity must >=2, but the compiler - // may not know this. - current_val = doc.tape[location]; - current_type = uint8_t(current_val >> 56); - depth++; - assert(depth < max_depth); - depth_index[depth].start_of_scope = location; - depth_index[depth].scope_type = current_type; - } -} -dom::parser::Iterator::Iterator( - const dom::parser::Iterator &o) noexcept - : doc(o.doc), - max_depth(o.depth), - depth(o.depth), - location(o.location), - tape_length(o.tape_length), - current_type(o.current_type), - current_val(o.current_val) -{ - depth_index = new scopeindex_t[max_depth+1]; - std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0])); -} - -dom::parser::Iterator::~Iterator() noexcept { - if (depth_index) { delete[] depth_index; } -} - -bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const { - if (!is_ok()) { - return false; - } - switch (current_type) { - case '"': // we have a string - os << '"'; - if (escape_strings) { - os << internal::escape_json_string(std::string_view(get_string(), get_string_length())); - } else { - // was: os << get_string();, but given that we can include null chars, we - // have to do something crazier: - std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator(os)); - } - os << '"'; - break; - case 'l': // we have a long int - os << get_integer(); - break; - case 'u': - os << get_unsigned_integer(); - break; - case 'd': - os << get_double(); - break; - case 'n': // we have a null - os << "null"; - break; - case 't': // we have a true - os << "true"; - break; - case 'f': // we have a false - os << "false"; - break; - case '{': // we have an object - case '}': // we end an object - case '[': // we start an array - case ']': // we end an array - os << char(current_type); - break; - default: - return false; - } - return true; -} - -bool dom::parser::Iterator::move_to(const char *pointer, - uint32_t length) { - char *new_pointer = nullptr; - if (pointer[0] == '#') { - // Converting fragment representation to string representation - new_pointer = new char[length]; - uint32_t new_length = 0; - for (uint32_t i = 1; i < length; i++) { - if (pointer[i] == '%' && pointer[i + 1] == 'x') { -#if __cpp_exceptions - try { -#endif - int fragment = - std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16); - if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) { - // escaping the character - new_pointer[new_length] = '\\'; - new_length++; - } - new_pointer[new_length] = char(fragment); - i += 3; -#if __cpp_exceptions - } catch (std::invalid_argument &) { - delete[] new_pointer; - return false; // the fragment is invalid - } -#endif - } else { - new_pointer[new_length] = pointer[i]; - } - new_length++; - } - length = new_length; - pointer = new_pointer; - } - - // saving the current state - size_t depth_s = depth; - size_t location_s = location; - uint8_t current_type_s = current_type; - uint64_t current_val_s = current_val; - - rewind(); // The json pointer is used from the root of the document. - - bool found = relative_move_to(pointer, length); - delete[] new_pointer; - - if (!found) { - // since the pointer has found nothing, we get back to the original - // position. - depth = depth_s; - location = location_s; - current_type = current_type_s; - current_val = current_val_s; - } - - return found; -} - -bool dom::parser::Iterator::relative_move_to(const char *pointer, - uint32_t length) { - if (length == 0) { - // returns the whole document - return true; - } - - if (pointer[0] != '/') { - // '/' must be the first character - return false; - } - - // finding the key in an object or the index in an array - std::string key_or_index; - uint32_t offset = 1; - - // checking for the "-" case - if (is_array() && pointer[1] == '-') { - if (length != 2) { - // the pointer must be exactly "/-" - // there can't be anything more after '-' as an index - return false; - } - key_or_index = '-'; - offset = length; // will skip the loop coming right after - } - - // We either transform the first reference token to a valid json key - // or we make sure it is a valid index in an array. - for (; offset < length; offset++) { - if (pointer[offset] == '/') { - // beginning of the next key or index - break; - } - if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) { - // the index of an array must be an integer - // we also make sure std::stoi won't discard whitespaces later - return false; - } - if (pointer[offset] == '~') { - // "~1" represents "/" - if (pointer[offset + 1] == '1') { - key_or_index += '/'; - offset++; - continue; - } - // "~0" represents "~" - if (pointer[offset + 1] == '0') { - key_or_index += '~'; - offset++; - continue; - } - } - if (pointer[offset] == '\\') { - if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' || - (pointer[offset + 1] <= 0x1F)) { - key_or_index += pointer[offset + 1]; - offset++; - continue; - } - return false; // invalid escaped character - } - if (pointer[offset] == '\"') { - // unescaped quote character. this is an invalid case. - // lets do nothing and assume most pointers will be valid. - // it won't find any corresponding json key anyway. - // return false; - } - key_or_index += pointer[offset]; - } - - bool found = false; - if (is_object()) { - if (move_to_key(key_or_index.c_str(), uint32_t(key_or_index.length()))) { - found = relative_move_to(pointer + offset, length - offset); - } - } else if (is_array()) { - if (key_or_index == "-") { // handling "-" case first - if (down()) { - while (next()) - ; // moving to the end of the array - // moving to the nonexistent value right after... - size_t npos; - if ((current_type == '[') || (current_type == '{')) { - // we need to jump - npos = uint32_t(current_val); - } else { - npos = - location + ((current_type == 'd' || current_type == 'l') ? 2 : 1); - } - location = npos; - current_val = doc.tape[npos]; - current_type = uint8_t(current_val >> 56); - return true; // how could it fail ? - } - } else { // regular numeric index - // The index can't have a leading '0' - if (key_or_index[0] == '0' && key_or_index.length() > 1) { - return false; - } - // it cannot be empty - if (key_or_index.length() == 0) { - return false; - } - // we already checked the index contains only valid digits - uint32_t index = std::stoi(key_or_index); - if (move_to_index(index)) { - found = relative_move_to(pointer + offset, length - offset); - } - } - } - - return found; -} - -SIMDJSON_POP_DISABLE_WARNINGS -} // namespace simdjson - -#endif // SIMDJSON_DISABLE_DEPRECATED_API - - -#endif // SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H -/* end file include/simdjson/dom/parsedjson_iterator-inl.h */ -/* begin file include/simdjson/dom/parser-inl.h */ -#ifndef SIMDJSON_INLINE_PARSER_H -#define SIMDJSON_INLINE_PARSER_H - -#include -#include - -namespace simdjson { -namespace dom { - -// -// parser inline implementation -// -simdjson_really_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity}, - loaded_bytes(nullptr) { -} -simdjson_really_inline parser::parser(parser &&other) noexcept = default; -simdjson_really_inline parser &parser::operator=(parser &&other) noexcept = default; - -inline bool parser::is_valid() const noexcept { return valid; } -inline int parser::get_error_code() const noexcept { return error; } -inline std::string parser::get_error_message() const noexcept { return error_message(error); } - -inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { - return valid ? doc.dump_raw_tape(os) : false; -} - -inline simdjson_result parser::read_file(const std::string &path) noexcept { - // Open the file - SIMDJSON_PUSH_DISABLE_WARNINGS - SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - std::FILE *fp = std::fopen(path.c_str(), "rb"); - SIMDJSON_POP_DISABLE_WARNINGS - - if (fp == nullptr) { - return IO_ERROR; - } - - // Get the file size - if(std::fseek(fp, 0, SEEK_END) < 0) { - std::fclose(fp); - return IO_ERROR; - } -#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS - __int64 len = _ftelli64(fp); - if(len == -1L) { - std::fclose(fp); - return IO_ERROR; - } -#else - long len = std::ftell(fp); - if((len < 0) || (len == LONG_MAX)) { - std::fclose(fp); - return IO_ERROR; - } -#endif - - // Make sure we have enough capacity to load the file - if (_loaded_bytes_capacity < size_t(len)) { - loaded_bytes.reset( internal::allocate_padded_buffer(len) ); - if (!loaded_bytes) { - std::fclose(fp); - return MEMALLOC; - } - _loaded_bytes_capacity = len; - } - - // Read the string - std::rewind(fp); - size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp); - if (std::fclose(fp) != 0 || bytes_read != size_t(len)) { - return IO_ERROR; - } - - return bytes_read; -} - -inline simdjson_result parser::load(const std::string &path) & noexcept { - size_t len; - auto _error = read_file(path).get(len); - if (_error) { return _error; } - return parse(loaded_bytes.get(), len, false); -} - -inline simdjson_result parser::load_many(const std::string &path, size_t batch_size) noexcept { - size_t len; - auto _error = read_file(path).get(len); - if (_error) { return _error; } - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - return document_stream(*this, reinterpret_cast(loaded_bytes.get()), len, batch_size); -} - -inline simdjson_result parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { - // Important: we need to ensure that document has enough capacity. - // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! - error_code _error = ensure_capacity(provided_doc, len); - if (_error) { return _error; } - if (realloc_if_needed) { - // Make sure we have enough capacity to copy len bytes - if (!loaded_bytes || _loaded_bytes_capacity < len) { - loaded_bytes.reset( internal::allocate_padded_buffer(len) ); - if (!loaded_bytes) { - return MEMALLOC; - } - _loaded_bytes_capacity = len; - } - std::memcpy(static_cast(loaded_bytes.get()), buf, len); - } - _error = implementation->parse(realloc_if_needed ? reinterpret_cast(loaded_bytes.get()): buf, len, provided_doc); - - if (_error) { return _error; } - - return provided_doc.root(); -} - -simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { - return parse_into_document(provided_doc, reinterpret_cast(buf), len, realloc_if_needed); -} -simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { - return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); -} -simdjson_really_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { - return parse_into_document(provided_doc, s.data(), s.length(), false); -} - - -inline simdjson_result parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { - return parse_into_document(doc, buf, len, realloc_if_needed); -} - -simdjson_really_inline simdjson_result parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { - return parse(reinterpret_cast(buf), len, realloc_if_needed); -} -simdjson_really_inline simdjson_result parser::parse(const std::string &s) & noexcept { - return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); -} -simdjson_really_inline simdjson_result parser::parse(const padded_string &s) & noexcept { - return parse(s.data(), s.length(), false); -} - -inline simdjson_result parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - return document_stream(*this, buf, len, batch_size); -} -inline simdjson_result parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { - return parse_many(reinterpret_cast(buf), len, batch_size); -} -inline simdjson_result parser::parse_many(const std::string &s, size_t batch_size) noexcept { - return parse_many(s.data(), s.length(), batch_size); -} -inline simdjson_result parser::parse_many(const padded_string &s, size_t batch_size) noexcept { - return parse_many(s.data(), s.length(), batch_size); -} - -simdjson_really_inline size_t parser::capacity() const noexcept { - return implementation ? implementation->capacity() : 0; -} -simdjson_really_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; -} -simdjson_really_inline size_t parser::max_depth() const noexcept { - return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; -} - -simdjson_warn_unused -inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { - // - // Reallocate implementation if needed - // - error_code err; - if (implementation) { - err = implementation->allocate(capacity, max_depth); - } else { - err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation); - } - if (err) { return err; } - return SUCCESS; -} - -#ifndef SIMDJSON_DISABLE_DEPRECATED_API -simdjson_warn_unused -inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { - return !allocate(capacity, max_depth); -} -#endif // SIMDJSON_DISABLE_DEPRECATED_API - -inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { - return ensure_capacity(doc, desired_capacity); -} - - -inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { - // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. - // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. - if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } - // If we don't have enough capacity, (try to) automatically bump it. - // If the document needs allocation, do it too. - // Both in one if statement to minimize unlikely branching. - // - // Note: we must make sure that this function is called if capacity() == 0. We do so because we - // ensure that desired_capacity > 0. - if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { - if (desired_capacity > max_capacity()) { - return error = CAPACITY; - } - error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS; - error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS; - if(err1 != SUCCESS) { return error = err1; } - if(err2 != SUCCESS) { return error = err2; } - } - return SUCCESS; -} - -simdjson_really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = MINIMAL_DOCUMENT_CAPACITY; - } -} - -} // namespace dom -} // namespace simdjson - -#endif // SIMDJSON_INLINE_PARSER_H -/* end file include/simdjson/dom/parser-inl.h */ -/* begin file include/simdjson/internal/tape_ref-inl.h */ -#ifndef SIMDJSON_INLINE_TAPE_REF_H -#define SIMDJSON_INLINE_TAPE_REF_H - -#include - -namespace simdjson { -namespace internal { - -// -// tape_ref inline implementation -// -simdjson_really_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} -simdjson_really_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} - - -simdjson_really_inline bool tape_ref::is_document_root() const noexcept { - return json_index == 1; // should we ever change the structure of the tape, this should get updated. -} - -// Some value types have a specific on-tape word value. It can be faster -// to check the type by doing a word-to-word comparison instead of extracting the -// most significant 8 bits. - -simdjson_really_inline bool tape_ref::is_double() const noexcept { - constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56; - return doc->tape[json_index] == tape_double; -} -simdjson_really_inline bool tape_ref::is_int64() const noexcept { - constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56; - return doc->tape[json_index] == tape_int64; -} -simdjson_really_inline bool tape_ref::is_uint64() const noexcept { - constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56; - return doc->tape[json_index] == tape_uint64; -} -simdjson_really_inline bool tape_ref::is_false() const noexcept { - constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56; - return doc->tape[json_index] == tape_false; -} -simdjson_really_inline bool tape_ref::is_true() const noexcept { - constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56; - return doc->tape[json_index] == tape_true; -} -simdjson_really_inline bool tape_ref::is_null_on_tape() const noexcept { - constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56; - return doc->tape[json_index] == tape_null; -} - -inline size_t tape_ref::after_element() const noexcept { - switch (tape_ref_type()) { - case tape_type::START_ARRAY: - case tape_type::START_OBJECT: - return matching_brace_index(); - case tape_type::UINT64: - case tape_type::INT64: - case tape_type::DOUBLE: - return json_index + 2; - default: - return json_index + 1; - } -} -simdjson_really_inline tape_type tape_ref::tape_ref_type() const noexcept { - return static_cast(doc->tape[json_index] >> 56); -} -simdjson_really_inline uint64_t internal::tape_ref::tape_value() const noexcept { - return doc->tape[json_index] & internal::JSON_VALUE_MASK; -} -simdjson_really_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { - return uint32_t(doc->tape[json_index]); -} -simdjson_really_inline uint32_t internal::tape_ref::scope_count() const noexcept { - return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); -} - -template -simdjson_really_inline T tape_ref::next_tape_value() const noexcept { - static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); - // Though the following is tempting... - // return *reinterpret_cast(&doc->tape[json_index + 1]); - // It is not generally safe. It is safer, and often faster to rely - // on memcpy. Yes, it is uglier, but it is also encapsulated. - T x; - std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t)); - return x; -} - -simdjson_really_inline uint32_t internal::tape_ref::get_string_length() const noexcept { - size_t string_buf_index = size_t(tape_value()); - uint32_t len; - std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); - return len; -} - -simdjson_really_inline const char * internal::tape_ref::get_c_str() const noexcept { - size_t string_buf_index = size_t(tape_value()); - return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); -} - -inline std::string_view internal::tape_ref::get_string_view() const noexcept { - return std::string_view( - get_c_str(), - get_string_length() - ); -} - -} // namespace internal -} // namespace simdjson - -#endif // SIMDJSON_INLINE_TAPE_REF_H -/* end file include/simdjson/internal/tape_ref-inl.h */ -/* begin file include/simdjson/dom/serialization-inl.h */ - -#ifndef SIMDJSON_SERIALIZATION_INL_H -#define SIMDJSON_SERIALIZATION_INL_H - - -#include -#include - -namespace simdjson { -namespace dom { -inline bool parser::print_json(std::ostream &os) const noexcept { - if (!valid) { return false; } - simdjson::internal::string_builder<> sb; - sb.append(doc.root()); - std::string_view answer = sb.str(); - os << answer; - return true; -} -} -/*** - * Number utility functions - **/ - - -namespace { -/**@private - * Escape sequence like \b or \u0001 - * We expect that most compilers will use 8 bytes for this data structure. - **/ -struct escape_sequence { - uint8_t length; - const char string[7]; // technically, we only ever need 6 characters, we pad to 8 -}; -/**@private - * This converts a signed integer into a character sequence. - * The caller is responsible for providing enough memory (at least - * 20 characters.) - * Though various runtime libraries provide itoa functions, - * it is not part of the C++ standard. The C++17 standard - * adds the to_chars functions which would do as well, but - * we want to support C++11. - */ -char *fast_itoa(char *output, int64_t value) noexcept { - // This is a standard implementation of itoa. - char buffer[20]; - uint64_t value_positive; - // In general, negating a signed integer is unsafe. - if(value < 0) { - *output++ = '-'; - // Doing value_positive = -value; while avoiding - // undefined behavior warnings. - // It assumes two complement's which is universal at this - // point in time. - std::memcpy(&value_positive, &value, sizeof(value)); - value_positive = (~value_positive) + 1; // this is a negation - } else { - value_positive = value; - } - // We work solely with value_positive. It *might* be easier - // for an optimizing compiler to deal with an unsigned variable - // as far as performance goes. - const char *const end_buffer = buffer + 20; - char *write_pointer = buffer + 19; - // A faster approach is possible if we expect large integers: - // unroll the loop (work in 100s, 1000s) and use some kind of - // memoization. - while(value_positive >= 10) { - *write_pointer-- = char('0' + (value_positive % 10)); - value_positive /= 10; - } - *write_pointer = char('0' + value_positive); - size_t len = end_buffer - write_pointer; - std::memcpy(output, write_pointer, len); - return output + len; -} -/**@private - * This converts an unsigned integer into a character sequence. - * The caller is responsible for providing enough memory (at least - * 19 characters.) - * Though various runtime libraries provide itoa functions, - * it is not part of the C++ standard. The C++17 standard - * adds the to_chars functions which would do as well, but - * we want to support C++11. - */ -char *fast_itoa(char *output, uint64_t value) noexcept { - // This is a standard implementation of itoa. - char buffer[20]; - const char *const end_buffer = buffer + 20; - char *write_pointer = buffer + 19; - // A faster approach is possible if we expect large integers: - // unroll the loop (work in 100s, 1000s) and use some kind of - // memoization. - while(value >= 10) { - *write_pointer-- = char('0' + (value % 10)); - value /= 10; - }; - *write_pointer = char('0' + value); - size_t len = end_buffer - write_pointer; - std::memcpy(output, write_pointer, len); - return output + len; -} -} // anonymous namespace -namespace internal { - -/*** - * Minifier/formatter code. - **/ - -simdjson_really_inline void mini_formatter::number(uint64_t x) { - char number_buffer[24]; - char *newp = fast_itoa(number_buffer, x); - buffer.insert(buffer.end(), number_buffer, newp); -} - -simdjson_really_inline void mini_formatter::number(int64_t x) { - char number_buffer[24]; - char *newp = fast_itoa(number_buffer, x); - buffer.insert(buffer.end(), number_buffer, newp); -} - -simdjson_really_inline void mini_formatter::number(double x) { - char number_buffer[24]; - // Currently, passing the nullptr to the second argument is - // safe because our implementation does not check the second - // argument. - char *newp = internal::to_chars(number_buffer, nullptr, x); - buffer.insert(buffer.end(), number_buffer, newp); -} - -simdjson_really_inline void mini_formatter::start_array() { one_char('['); } -simdjson_really_inline void mini_formatter::end_array() { one_char(']'); } -simdjson_really_inline void mini_formatter::start_object() { one_char('{'); } -simdjson_really_inline void mini_formatter::end_object() { one_char('}'); } -simdjson_really_inline void mini_formatter::comma() { one_char(','); } - - -simdjson_really_inline void mini_formatter::true_atom() { - const char * s = "true"; - buffer.insert(buffer.end(), s, s + 4); -} -simdjson_really_inline void mini_formatter::false_atom() { - const char * s = "false"; - buffer.insert(buffer.end(), s, s + 5); -} -simdjson_really_inline void mini_formatter::null_atom() { - const char * s = "null"; - buffer.insert(buffer.end(), s, s + 4); -} -simdjson_really_inline void mini_formatter::one_char(char c) { buffer.push_back(c); } -simdjson_really_inline void mini_formatter::key(std::string_view unescaped) { - string(unescaped); - one_char(':'); -} -simdjson_really_inline void mini_formatter::string(std::string_view unescaped) { - one_char('\"'); - size_t i = 0; - // Fast path for the case where we have no control character, no ", and no backslash. - // This should include most keys. - // - // We would like to use 'bool' but some compilers take offense to bitwise operation - // with bool types. - constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - for(;i + 8 <= unescaped.length(); i += 8) { - // Poor's man vectorization. This could get much faster if we used SIMD. - // - // It is not the case that replacing '|' with '||' would be neutral performance-wise. - if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] - | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] - | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] - | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])] - ) { break; } - } - for(;i < unescaped.length(); i++) { - if(needs_escaping[uint8_t(unescaped[i])]) { break; } - } - // The following is also possible and omits a 256-byte table, but it is slower: - // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) - // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {} - - // At least for long strings, the following should be fast. We could - // do better by integrating the checks and the insertion. - buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i); - // We caught a control character if we enter this loop (slow). - // Note that we are do not restart from the beginning, but rather we continue - // from the point where we encountered something that requires escaping. - for (; i < unescaped.length(); i++) { - switch (unescaped[i]) { - case '\"': - { - const char * s = "\\\""; - buffer.insert(buffer.end(), s, s + 2); - } - break; - case '\\': - { - const char * s = "\\\\"; - buffer.insert(buffer.end(), s, s + 2); - } - break; - default: - if (uint8_t(unescaped[i]) <= 0x1F) { - // If packed, this uses 8 * 32 bytes. - // Note that we expect most compilers to embed this code in the data - // section. - constexpr static escape_sequence escaped[32] = { - {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, - {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, - {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, - {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, - {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, - {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, - {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, - {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; - auto u = escaped[uint8_t(unescaped[i])]; - buffer.insert(buffer.end(), u.string, u.string + u.length); - } else { - one_char(unescaped[i]); - } - } // switch - } // for - one_char('\"'); -} - -inline void mini_formatter::clear() { - buffer.clear(); -} - -simdjson_really_inline std::string_view mini_formatter::str() const { - return std::string_view(buffer.data(), buffer.size()); -} - - -/*** - * String building code. - **/ - -template -inline void string_builder::append(simdjson::dom::element value) { - // using tape_type = simdjson::internal::tape_type; - size_t depth = 0; - constexpr size_t MAX_DEPTH = 16; - bool is_object[MAX_DEPTH]; - is_object[0] = false; - bool after_value = false; - - internal::tape_ref iter(value.tape); - do { - // print commas after each value - if (after_value) { - format.comma(); - } - // If we are in an object, print the next key and :, and skip to the next - // value. - if (is_object[depth]) { - format.key(iter.get_string_view()); - iter.json_index++; - } - switch (iter.tape_ref_type()) { - - // Arrays - case tape_type::START_ARRAY: { - // If we're too deep, we need to recurse to go deeper. - depth++; - if (simdjson_unlikely(depth >= MAX_DEPTH)) { - append(simdjson::dom::array(iter)); - iter.json_index = iter.matching_brace_index() - 1; // Jump to the ] - depth--; - break; - } - - // Output start [ - format.start_array(); - iter.json_index++; - - // Handle empty [] (we don't want to come back around and print commas) - if (iter.tape_ref_type() == tape_type::END_ARRAY) { - format.end_array(); - depth--; - break; - } - - is_object[depth] = false; - after_value = false; - continue; - } - - // Objects - case tape_type::START_OBJECT: { - // If we're too deep, we need to recurse to go deeper. - depth++; - if (simdjson_unlikely(depth >= MAX_DEPTH)) { - append(simdjson::dom::object(iter)); - iter.json_index = iter.matching_brace_index() - 1; // Jump to the } - depth--; - break; - } - - // Output start { - format.start_object(); - iter.json_index++; - - // Handle empty {} (we don't want to come back around and print commas) - if (iter.tape_ref_type() == tape_type::END_OBJECT) { - format.end_object(); - depth--; - break; - } - - is_object[depth] = true; - after_value = false; - continue; - } - - // Scalars - case tape_type::STRING: - format.string(iter.get_string_view()); - break; - case tape_type::INT64: - format.number(iter.next_tape_value()); - iter.json_index++; // numbers take up 2 spots, so we need to increment - // extra - break; - case tape_type::UINT64: - format.number(iter.next_tape_value()); - iter.json_index++; // numbers take up 2 spots, so we need to increment - // extra - break; - case tape_type::DOUBLE: - format.number(iter.next_tape_value()); - iter.json_index++; // numbers take up 2 spots, so we need to increment - // extra - break; - case tape_type::TRUE_VALUE: - format.true_atom(); - break; - case tape_type::FALSE_VALUE: - format.false_atom(); - break; - case tape_type::NULL_VALUE: - format.null_atom(); - break; - - // These are impossible - case tape_type::END_ARRAY: - case tape_type::END_OBJECT: - case tape_type::ROOT: - SIMDJSON_UNREACHABLE(); - } - iter.json_index++; - after_value = true; - - // Handle multiple ends in a row - while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || - iter.tape_ref_type() == tape_type::END_OBJECT)) { - if (iter.tape_ref_type() == tape_type::END_ARRAY) { - format.end_array(); - } else { - format.end_object(); - } - depth--; - iter.json_index++; - } - - // Stop when we're at depth 0 - } while (depth != 0); -} - -template -inline void string_builder::append(simdjson::dom::object value) { - format.start_object(); - auto pair = value.begin(); - auto end = value.end(); - if (pair != end) { - append(*pair); - for (++pair; pair != end; ++pair) { - format.comma(); - append(*pair); - } - } - format.end_object(); -} - -template -inline void string_builder::append(simdjson::dom::array value) { - format.start_array(); - auto iter = value.begin(); - auto end = value.end(); - if (iter != end) { - append(*iter); - for (++iter; iter != end; ++iter) { - format.comma(); - append(*iter); - } - } - format.end_array(); -} - -template -simdjson_really_inline void string_builder::append(simdjson::dom::key_value_pair kv) { - format.key(kv.key); - append(kv.value); -} - -template -simdjson_really_inline void string_builder::clear() { - format.clear(); -} - -template -simdjson_really_inline std::string_view string_builder::str() const { - return format.str(); -} - - -} // namespace internal -} // namespace simdjson - -#endif -/* end file include/simdjson/dom/serialization-inl.h */ - -SIMDJSON_POP_DISABLE_WARNINGS - -#endif // SIMDJSON_DOM_H -/* end file include/simdjson/dom.h */ -/* begin file include/simdjson/builtin.h */ -#ifndef SIMDJSON_BUILTIN_H -#define SIMDJSON_BUILTIN_H - -/* begin file include/simdjson/implementations.h */ -#ifndef SIMDJSON_IMPLEMENTATIONS_H -#define SIMDJSON_IMPLEMENTATIONS_H - -/* begin file include/simdjson/implementation-base.h */ -#ifndef SIMDJSON_IMPLEMENTATION_BASE_H -#define SIMDJSON_IMPLEMENTATION_BASE_H - -/** - * @file - * - * Includes common stuff needed for implementations. - */ - - -// Implementation-internal files (must be included before the implementations themselves, to keep -// amalgamation working--otherwise, the first time a file is included, it might be put inside the -// #ifdef SIMDJSON_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other implementations can't -// compile unless that implementation is turned on). -/* begin file include/simdjson/internal/jsoncharutils_tables.h */ -#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H -#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H - - -#ifdef JSON_TEST_STRINGS -void found_string(const uint8_t *buf, const uint8_t *parsed_begin, - const uint8_t *parsed_end); -void found_bad_string(const uint8_t *buf); -#endif - -namespace simdjson { -namespace internal { -// structural chars here are -// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) -// we are also interested in the four whitespace characters -// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d - -extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256]; -extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256]; -extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; - -} // namespace internal -} // namespace simdjson - -#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H -/* end file include/simdjson/internal/jsoncharutils_tables.h */ -/* begin file include/simdjson/internal/numberparsing_tables.h */ -#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H -#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H - - -namespace simdjson { -namespace internal { -/** - * The smallest non-zero float (binary64) is 2^-1074. - * We take as input numbers of the form w x 10^q where w < 2^64. - * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. - * However, we have that - * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. - * Thus it is possible for a number of the form w * 10^-342 where - * w is a 64-bit value to be a non-zero floating-point number. - ********* - * Any number of form w * 10^309 where w>= 1 is going to be - * infinite in binary64 so we never need to worry about powers - * of 5 greater than 308. - */ -constexpr int smallest_power = -342; -constexpr int largest_power = 308; - -/** - * Represents a 128-bit value. - * low: least significant 64 bits. - * high: most significant 64 bits. - */ -struct value128 { - uint64_t low; - uint64_t high; -}; - - -// Precomputed powers of ten from 10^0 to 10^22. These -// can be represented exactly using the double type. -extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; - - -/** - * When mapping numbers from decimal to binary, - * we go from w * 10^q to m * 2^p but we have - * 10^q = 5^q * 2^q, so effectively - * we are trying to match - * w * 2^q * 5^q to m * 2^p. Thus the powers of two - * are not a concern since they can be represented - * exactly using the binary notation, only the powers of five - * affect the binary significand. - */ - - -// The truncated powers of five from 5^-342 all the way to 5^308 -// The mantissa is truncated to 128 bits, and -// never rounded up. Uses about 10KB. -extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; -} // namespace internal -} // namespace simdjson - -#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H -/* end file include/simdjson/internal/numberparsing_tables.h */ -/* begin file include/simdjson/internal/simdprune_tables.h */ -#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H -#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H - -#include - -namespace simdjson { // table modified and copied from -namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable - -extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256]; - -extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; - -// 256 * 8 bytes = 2kB, easily fits in cache. -extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; - -} // namespace internal -} // namespace simdjson - -#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H -/* end file include/simdjson/internal/simdprune_tables.h */ - -#endif // SIMDJSON_IMPLEMENTATION_BASE_H -/* end file include/simdjson/implementation-base.h */ - -// -// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order -// in which we include them. -// - -#ifndef SIMDJSON_IMPLEMENTATION_ARM64 -#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) -#endif -#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 - -#ifdef __has_include -// How do we detect that a compiler supports vbmi2? -// For sure if the following header is found, we are ok? -#if __has_include() -#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 -#endif -#endif - -#ifdef _MSC_VER -#if _MSC_VER >= 1920 -// Visual Studio 2019 and up support VBMI2 under x64 even if the header -// avx512vbmi2intrin.h is not found. -#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 -#endif -#endif - -// By default, we allow AVX512. -#ifndef SIMDJSON_AVX512_ALLOWED -#define SIMDJSON_AVX512_ALLOWED 1 -#endif - -// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected -// at runtime. -#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE -#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2)) -#endif - -#ifdef _MSC_VER -// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see -// https://github.com/simdjson/simdjson/issues/1247 -#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) -#else -#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) -#endif - -// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected -// at runtime. -#ifndef SIMDJSON_IMPLEMENTATION_HASWELL -#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 -#endif -#ifdef _MSC_VER -// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see -// https://github.com/simdjson/simdjson/issues/1247 -#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) -#else -#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) -#endif - -// Default Westmere to on if this is x86-64. Note that the macro SIMDJSON_REQUIRES_HASWELL appears unused. -#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE -#define SIMDJSON_IMPLEMENTATION_WESTMERE (SIMDJSON_IS_X86_64 && !SIMDJSON_REQUIRES_HASWELL) -#endif -#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) - -#ifndef SIMDJSON_IMPLEMENTATION_PPC64 -#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64) -#endif -#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 - -// Default Fallback to on unless a builtin implementation has already been selected. -#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK -#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 // (!SIMDJSON_CAN_ALWAYS_RUN_ARM64 && !SIMDJSON_CAN_ALWAYS_RUN_HASWELL && !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE && !SIMDJSON_CAN_ALWAYS_RUN_PPC64) -#endif -#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK - -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_UNDESIRED_WARNINGS - -// Implementations -/* begin file include/simdjson/arm64.h */ -#ifndef SIMDJSON_ARM64_H -#define SIMDJSON_ARM64_H - - -#if SIMDJSON_IMPLEMENTATION_ARM64 - -namespace simdjson { -/** - * Implementation for NEON (ARMv8). - */ -namespace arm64 { -} // namespace arm64 -} // namespace simdjson - -/* begin file include/simdjson/arm64/implementation.h */ -#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H -#define SIMDJSON_ARM64_IMPLEMENTATION_H - - -namespace simdjson { -namespace arm64 { - -namespace { -using namespace simdjson; -using namespace simdjson::dom; -} - -class implementation final : public simdjson::implementation { -public: - simdjson_really_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; - -} // namespace arm64 -} // namespace simdjson - -#endif // SIMDJSON_ARM64_IMPLEMENTATION_H -/* end file include/simdjson/arm64/implementation.h */ - -/* begin file include/simdjson/arm64/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "arm64" -// #define SIMDJSON_IMPLEMENTATION arm64 -/* end file include/simdjson/arm64/begin.h */ - -// Declarations -/* begin file include/simdjson/generic/dom_parser_implementation.h */ - -namespace simdjson { -namespace arm64 { - -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container - -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; - - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - -}; - -} // namespace arm64 -} // namespace simdjson - -namespace simdjson { -namespace arm64 { - -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; - - _capacity = capacity; - return SUCCESS; -} - -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } - - _max_depth = max_depth; - return SUCCESS; -} - -} // namespace arm64 -} // namespace simdjson -/* end file include/simdjson/generic/dom_parser_implementation.h */ -/* begin file include/simdjson/arm64/intrinsics.h */ -#ifndef SIMDJSON_ARM64_INTRINSICS_H -#define SIMDJSON_ARM64_INTRINSICS_H - -// This should be the correct header whether -// you use visual studio or other compilers. -#include - -#endif // SIMDJSON_ARM64_INTRINSICS_H -/* end file include/simdjson/arm64/intrinsics.h */ -/* begin file include/simdjson/arm64/bitmanipulation.h */ -#ifndef SIMDJSON_ARM64_BITMANIPULATION_H -#define SIMDJSON_ARM64_BITMANIPULATION_H - -namespace simdjson { -namespace arm64 { -namespace { - -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline int count_ones(uint64_t input_num) { - return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); -} - - -#if defined(__GNUC__) // catches clang and gcc -/** - * ARM has a fast 64-bit "bit reversal function" that is handy. However, - * it is not generally available as an intrinsic function under Visual - * Studio (though this might be changing). Even under clang/gcc, we - * apparently need to invoke inline assembly. - */ -/* - * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that - * work well with bit reversal may use it. - */ -#define SIMDJSON_PREFER_REVERSE_BITS 1 - -/* reverse the bits */ -simdjson_really_inline uint64_t reverse_bits(uint64_t input_num) { - uint64_t rev_bits; - __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); - return rev_bits; -} - -/** - * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, - * then this will set to zero the leading bit. It is possible for leading_zeroes to be - * greating or equal to 63 in which case we trigger undefined behavior, but the output - * of such undefined behavior is never used. - **/ -SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { - return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); -} - -#endif - -simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - *result = value1 + value2; - return *result < value1; -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} - -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson - -#endif // SIMDJSON_ARM64_BITMANIPULATION_H -/* end file include/simdjson/arm64/bitmanipulation.h */ -/* begin file include/simdjson/arm64/bitmask.h */ -#ifndef SIMDJSON_ARM64_BITMASK_H -#define SIMDJSON_ARM64_BITMASK_H - -namespace simdjson { -namespace arm64 { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) { - ///////////// - // We could do this with PMULL, but it is apparently slow. - // - //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension - //return vmull_p64(-1ULL, bitmask); - //#else - // Analysis by @sebpop: - // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out - // in between other vector code, so effectively the extra cycles of the sequence do not matter - // because the GPR units are idle otherwise and the critical path is on the FP side. - // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) - // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) - /////////// - bitmask ^= bitmask << 1; - bitmask ^= bitmask << 2; - bitmask ^= bitmask << 4; - bitmask ^= bitmask << 8; - bitmask ^= bitmask << 16; - bitmask ^= bitmask << 32; - return bitmask; -} - -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson - -#endif -/* end file include/simdjson/arm64/bitmask.h */ -/* begin file include/simdjson/arm64/simd.h */ -#ifndef SIMDJSON_ARM64_SIMD_H -#define SIMDJSON_ARM64_SIMD_H - -#include - - -namespace simdjson { -namespace arm64 { -namespace { -namespace simd { - -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -namespace { -// Start of private section with Visual Studio workaround - - -/** - * make_uint8x16_t initializes a SIMD register (uint8x16_t). - * This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...} - * is not recognized under Visual Studio! This is a workaround. - * Using a std::initializer_list as a parameter resulted in - * inefficient code. With the current approach, if the parameters are - * compile-time constants, - * GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}. - * You should not use this function except for compile-time constants: - * it is not efficient. - */ -simdjson_really_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, - uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, - uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, - uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) { - // Doing a load like so end ups generating worse code. - // uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, - // x9, x10,x11,x12,x13,x14,x15,x16}; - // return vld1q_u8(array); - uint8x16_t x{}; - // incredibly, Visual Studio does not allow x[0] = x1 - x = vsetq_lane_u8(x1, x, 0); - x = vsetq_lane_u8(x2, x, 1); - x = vsetq_lane_u8(x3, x, 2); - x = vsetq_lane_u8(x4, x, 3); - x = vsetq_lane_u8(x5, x, 4); - x = vsetq_lane_u8(x6, x, 5); - x = vsetq_lane_u8(x7, x, 6); - x = vsetq_lane_u8(x8, x, 7); - x = vsetq_lane_u8(x9, x, 8); - x = vsetq_lane_u8(x10, x, 9); - x = vsetq_lane_u8(x11, x, 10); - x = vsetq_lane_u8(x12, x, 11); - x = vsetq_lane_u8(x13, x, 12); - x = vsetq_lane_u8(x14, x, 13); - x = vsetq_lane_u8(x15, x, 14); - x = vsetq_lane_u8(x16, x, 15); - return x; -} - -simdjson_really_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, - uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8) { - uint8x8_t x{}; - x = vset_lane_u8(x1, x, 0); - x = vset_lane_u8(x2, x, 1); - x = vset_lane_u8(x3, x, 2); - x = vset_lane_u8(x4, x, 3); - x = vset_lane_u8(x5, x, 4); - x = vset_lane_u8(x6, x, 5); - x = vset_lane_u8(x7, x, 6); - x = vset_lane_u8(x8, x, 7); - return x; -} - -// We have to do the same work for make_int8x16_t -simdjson_really_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, - int8_t x5, int8_t x6, int8_t x7, int8_t x8, - int8_t x9, int8_t x10, int8_t x11, int8_t x12, - int8_t x13, int8_t x14, int8_t x15, int8_t x16) { - // Doing a load like so end ups generating worse code. - // int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, - // x9, x10,x11,x12,x13,x14,x15,x16}; - // return vld1q_s8(array); - int8x16_t x{}; - // incredibly, Visual Studio does not allow x[0] = x1 - x = vsetq_lane_s8(x1, x, 0); - x = vsetq_lane_s8(x2, x, 1); - x = vsetq_lane_s8(x3, x, 2); - x = vsetq_lane_s8(x4, x, 3); - x = vsetq_lane_s8(x5, x, 4); - x = vsetq_lane_s8(x6, x, 5); - x = vsetq_lane_s8(x7, x, 6); - x = vsetq_lane_s8(x8, x, 7); - x = vsetq_lane_s8(x9, x, 8); - x = vsetq_lane_s8(x10, x, 9); - x = vsetq_lane_s8(x11, x, 10); - x = vsetq_lane_s8(x12, x, 11); - x = vsetq_lane_s8(x13, x, 12); - x = vsetq_lane_s8(x14, x, 13); - x = vsetq_lane_s8(x15, x, 14); - x = vsetq_lane_s8(x16, x, 15); - return x; -} - -// End of private section with Visual Studio workaround -} // namespace -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO - - - template - struct simd8; - - // - // Base class of simd8 and simd8, both of which use uint8x16_t internally. - // - template> - struct base_u8 { - uint8x16_t value; - static const int SIZE = sizeof(value); - - // Conversion from/to SIMD register - simdjson_really_inline base_u8(const uint8x16_t _value) : value(_value) {} - simdjson_really_inline operator const uint8x16_t&() const { return this->value; } - simdjson_really_inline operator uint8x16_t&() { return this->value; } - - // Bit operations - simdjson_really_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } - simdjson_really_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } - simdjson_really_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } - simdjson_really_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - simdjson_really_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } - - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } - - template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { - return vextq_u8(prev_chunk, *this, 16 - N); - } - }; - - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base_u8 { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - static simdjson_really_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } - - simdjson_really_inline simd8(const uint8x16_t _value) : base_u8(_value) {} - // False constructor - simdjson_really_inline simd8() : simd8(vdupq_n_u8(0)) {} - // Splat constructor - simdjson_really_inline simd8(bool _value) : simd8(splat(_value)) {} - - // We return uint32_t instead of uint16_t because that seems to be more efficient for most - // purposes (cutting it down to uint16_t costs performance in some compilers). - simdjson_really_inline uint32_t to_bitmask() const { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); -#else - const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; -#endif - auto minput = *this & bit_mask; - uint8x16_t tmp = vpaddq_u8(minput, minput); - tmp = vpaddq_u8(tmp, tmp); - tmp = vpaddq_u8(tmp, tmp); - return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); - } - simdjson_really_inline bool any() const { return vmaxvq_u8(*this) != 0; } - }; - - // Unsigned bytes - template<> - struct simd8: base_u8 { - static simdjson_really_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } - static simdjson_really_inline uint8x16_t zero() { return vdupq_n_u8(0); } - static simdjson_really_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } - - simdjson_really_inline simd8(const uint8x16_t _value) : base_u8(_value) {} - // Zero constructor - simdjson_really_inline simd8() : simd8(zero()) {} - // Array constructor - simdjson_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} - // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Member-by-member initialization -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(make_uint8x16_t( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} -#else - simdjson_really_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(uint8x16_t{ - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - }) {} -#endif - - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Store to array - simdjson_really_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } - - // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } - - // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } - - // Order-specific operations - simdjson_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); } - simdjson_really_inline uint8_t min_val() const { return vminvq_u8(*this); } - simdjson_really_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } - simdjson_really_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } - simdjson_really_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } - // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } - // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } - - // Bit-specific operations - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } - template - simdjson_really_inline simd8 shr() const { return vshrq_n_u8(*this, N); } - template - simdjson_really_inline simd8 shl() const { return vshlq_n_u8(*this, N); } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); - } - - - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint16_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_really_inline void compress(uint16_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; - uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); - // we increment by 0x08 the second half of the mask -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - uint8x16_t inc = make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); -#else - uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; -#endif - shufmask = vaddq_u8(shufmask, inc); - // this is the version "nearly pruned" - uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); - vst1q_u8(reinterpret_cast(output), answer); - } - - // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a - // bitset) to output1, then those corresponding to a 0 in the high half to output2. - template - simdjson_really_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { - using internal::thintable_epi8; - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); - uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); - // we increment by 0x08 the second half of the mask -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - uint8x8_t inc = make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); -#else - uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; -#endif - compactmask2 = vadd_u8(compactmask2, inc); - // store each result (with the second store possibly overlapping the first) - vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); - vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); - } - - template - simdjson_really_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - - template - simdjson_really_inline simd8 apply_lookup_16_to(const simd8 original) { - return vqtbl1q_u8(*this, simd8(original)); - } - }; - - // Signed bytes - template<> - struct simd8 { - int8x16_t value; - - static simdjson_really_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } - static simdjson_really_inline simd8 zero() { return vdupq_n_s8(0); } - static simdjson_really_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } - - // Conversion from/to SIMD register - simdjson_really_inline simd8(const int8x16_t _value) : value{_value} {} - simdjson_really_inline operator const int8x16_t&() const { return this->value; } - simdjson_really_inline operator int8x16_t&() { return this->value; } - - // Zero constructor - simdjson_really_inline simd8() : simd8(zero()) {} - // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_really_inline simd8(const int8_t* values) : simd8(load(values)) {} - // Member-by-member initialization -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(make_int8x16_t( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} -#else - simdjson_really_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(int8x16_t{ - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - }) {} -#endif - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Store to array - simdjson_really_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } - - // Explicit conversion to/from unsigned - // - // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. - // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 - // and relatively ugly and hard to read. -#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO - simdjson_really_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} -#endif - simdjson_really_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } - - // Math - simdjson_really_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } - - // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } - simdjson_really_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } - - template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { - return vextq_s8(prev_chunk, *this, 16 - N); - } - - // Perform a lookup assuming no value is larger than 16 - template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { - return lookup_table.apply_lookup_16_to(*this); - } - template - simdjson_really_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - - template - simdjson_really_inline simd8 apply_lookup_16_to(const simd8 original) { - return vqtbl1q_s8(*this, simd8(original)); - } - }; - - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} - - simdjson_really_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); - } - - simdjson_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); - } - - - simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { - uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); - // compute the prefix sum of the popcounts of each byte - uint64_t offsets = popcounts * 0x0101010101010101; - this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); - this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); - this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); - this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); - return offsets >> 56; - } - - simdjson_really_inline uint64_t to_bitmask() const { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - const uint8x16_t bit_mask = make_uint8x16_t( - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 - ); -#else - const uint8x16_t bit_mask = { - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, - 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 - }; -#endif - // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. - uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); - uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); - sum0 = vpaddq_u8(sum0, sum1); - sum0 = vpaddq_u8(sum0, sum0); - return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); - } - - simdjson_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask - ).to_bitmask(); - } - - simdjson_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 - -} // namespace simd -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson - -#endif // SIMDJSON_ARM64_SIMD_H -/* end file include/simdjson/arm64/simd.h */ -/* begin file include/simdjson/generic/jsoncharutils.h */ - -namespace simdjson { -namespace arm64 { -namespace { -namespace jsoncharutils { - -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} - -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} - -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} - -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} - -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif - -using internal::value128; - -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { - value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} - -} // namespace jsoncharutils -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file include/simdjson/generic/jsoncharutils.h */ -/* begin file include/simdjson/generic/atomparsing.h */ -namespace simdjson { -namespace arm64 { -namespace { -/// @private -namespace atomparsing { - -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - - -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} - -} // namespace atomparsing -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file include/simdjson/generic/atomparsing.h */ -/* begin file include/simdjson/arm64/stringparsing.h */ -#ifndef SIMDJSON_ARM64_STRINGPARSING_H -#define SIMDJSON_ARM64_STRINGPARSING_H - - -namespace simdjson { -namespace arm64 { -namespace { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return bs_bits != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } - - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote - -simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + sizeof(v0)); - v0.store(dst); - v1.store(dst + sizeof(v0)); - - // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we - // smash them together into a 64-byte mask and get the bitmask from there. - uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); - return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits - }; -} - -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson - -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace arm64 { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - -#endif // SIMDJSON_ARM64_STRINGPARSING_H -/* end file include/simdjson/arm64/stringparsing.h */ -/* begin file include/simdjson/arm64/numberparsing.h */ -#ifndef SIMDJSON_ARM64_NUMBERPARSING_H -#define SIMDJSON_ARM64_NUMBERPARSING_H - -namespace simdjson { -namespace arm64 { -namespace { - -// we don't have SSE, so let us use a scalar function -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - uint64_t val; - std::memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); -} - -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson - -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -/* begin file include/simdjson/generic/numberparsing.h */ -#include - -namespace simdjson { -namespace arm64 { - -namespace ondemand { -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; -} - -namespace { -/// @private -namespace numberparsing { - - - -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif - -namespace { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} -} -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) { -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) { -#endif - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; - } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - - // The fast path has now failed, so we are failing back on the slower path. - - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = 0.0; - return true; - } - - - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - - - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; - - - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. - - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. In very rare cases, even that - // will not suffice, though it is seemingly very hard to find such a scenario. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // At this point, we might need to add at most one to firstproduct, but this - // can only change the value of firstproduct.high if firstproduct.low is maximal. - if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { - // This is very unlikely, but if so, we need to do much more work! - return false; - } - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); - - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } - - mantissa += mantissa & 1; - mantissa >>= 1; - - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; -} - -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} - -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} - -template -error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { - double d; - if (parse_float_fallback(src, &d)) { - writer.append_double(d); - return SUCCESS; - } - return INVALID_NUMBER(src); -} - -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; -} - -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; - -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; -} - -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well - - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. - - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. - - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. - - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; -} - -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); -} - -template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens - // because slow_float_parsing is a non-inlined function. If we passed our writer reference to - // it, it would force it to be stored in memory, preventing the compiler from picking it apart - // and putting into registers. i.e. if we pass it as reference, it gets slow. - // This is what forces the skip_double, as well. - error_code error = slow_float_parsing(src, writer); - writer.skip_double(); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; -} - -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING - -template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds -} - -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } -#else - -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { - - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } - - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } - - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } - } - - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} - -// Inlineable functions -namespace { - -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); - -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - - -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); -} - -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; -} - -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return ondemand::number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return ondemand::number_type::unsigned_integer; - } - } - return ondemand::number_type::signed_integer; - } - // Hopefully, we have 'e' or 'E' or '.'. - return ondemand::number_type::floating_point_number; -} - -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, src_end, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += negative + 1; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (*p != '"') { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} -} //namespace {} -#endif // SIMDJSON_SKIPNUMBERPARSING - -} // namespace numberparsing -} // unnamed namespace -} // namespace arm64 -} // namespace simdjson -/* end file include/simdjson/generic/numberparsing.h */ - -#endif // SIMDJSON_ARM64_NUMBERPARSING_H -/* end file include/simdjson/arm64/numberparsing.h */ -/* begin file include/simdjson/arm64/end.h */ -/* end file include/simdjson/arm64/end.h */ - -#endif // SIMDJSON_IMPLEMENTATION_ARM64 - -#endif // SIMDJSON_ARM64_H -/* end file include/simdjson/arm64.h */ -/* begin file include/simdjson/fallback.h */ -#ifndef SIMDJSON_FALLBACK_H -#define SIMDJSON_FALLBACK_H - - -#if SIMDJSON_IMPLEMENTATION_FALLBACK - -namespace simdjson { -/** - * Fallback implementation (runs on any machine). - */ -namespace fallback { -} // namespace fallback -} // namespace simdjson - -/* begin file include/simdjson/fallback/implementation.h */ -#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H -#define SIMDJSON_FALLBACK_IMPLEMENTATION_H - - -namespace simdjson { -namespace fallback { - -namespace { -using namespace simdjson; -using namespace simdjson::dom; -} - -class implementation final : public simdjson::implementation { -public: - simdjson_really_inline implementation() : simdjson::implementation( - "fallback", - "Generic fallback implementation", - 0 - ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; - -} // namespace fallback -} // namespace simdjson - -#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H -/* end file include/simdjson/fallback/implementation.h */ - -/* begin file include/simdjson/fallback/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "fallback" -// #define SIMDJSON_IMPLEMENTATION fallback -/* end file include/simdjson/fallback/begin.h */ - -// Declarations -/* begin file include/simdjson/generic/dom_parser_implementation.h */ - -namespace simdjson { -namespace fallback { - -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container - -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; - - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - -}; - -} // namespace fallback -} // namespace simdjson - -namespace simdjson { -namespace fallback { - -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; - - _capacity = capacity; - return SUCCESS; -} - -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } - - _max_depth = max_depth; - return SUCCESS; -} - -} // namespace fallback -} // namespace simdjson -/* end file include/simdjson/generic/dom_parser_implementation.h */ -/* begin file include/simdjson/fallback/bitmanipulation.h */ -#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H -#define SIMDJSON_FALLBACK_BITMANIPULATION_H - -#include - -namespace simdjson { -namespace fallback { -namespace { - -#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) -static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { - unsigned long x0 = (unsigned long)x, top, bottom; - _BitScanForward(&top, (unsigned long)(x >> 32)); - _BitScanForward(&bottom, x0); - *ret = x0 ? bottom : 32 + top; - return x != 0; -} -static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { - unsigned long x1 = (unsigned long)(x >> 32), top, bottom; - _BitScanReverse(&top, x1); - _BitScanReverse(&bottom, (unsigned long)x); - *ret = x1 ? top + 32 : bottom; - return x != 0; -} -#endif - -/* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { -#ifdef _MSC_VER - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// _MSC_VER -} - -} // unnamed namespace -} // namespace fallback -} // namespace simdjson - -#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H -/* end file include/simdjson/fallback/bitmanipulation.h */ -/* begin file include/simdjson/generic/jsoncharutils.h */ - -namespace simdjson { -namespace fallback { -namespace { -namespace jsoncharutils { - -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} - -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} - -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} - -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} - -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif - -using internal::value128; - -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { - value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} - -} // namespace jsoncharutils -} // unnamed namespace -} // namespace fallback -} // namespace simdjson -/* end file include/simdjson/generic/jsoncharutils.h */ -/* begin file include/simdjson/generic/atomparsing.h */ -namespace simdjson { -namespace fallback { -namespace { -/// @private -namespace atomparsing { - -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - - -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} - -} // namespace atomparsing -} // unnamed namespace -} // namespace fallback -} // namespace simdjson -/* end file include/simdjson/generic/atomparsing.h */ -/* begin file include/simdjson/fallback/stringparsing.h */ -#ifndef SIMDJSON_FALLBACK_STRINGPARSING_H -#define SIMDJSON_FALLBACK_STRINGPARSING_H - - -namespace simdjson { -namespace fallback { -namespace { - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 1; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_really_inline bool has_quote_first() { return c == '"'; } - simdjson_really_inline bool has_backslash() { return c == '\\'; } - simdjson_really_inline int quote_index() { return c == '"' ? 0 : 1; } - simdjson_really_inline int backslash_index() { return c == '\\' ? 0 : 1; } - - uint8_t c; -}; // struct backslash_and_quote - -simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // store to dest unconditionally - we can overwrite the bits we don't like later - dst[0] = src[0]; - return { src[0] }; -} - -} // unnamed namespace -} // namespace fallback -} // namespace simdjson - -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace fallback { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace fallback -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - -#endif // SIMDJSON_FALLBACK_STRINGPARSING_H -/* end file include/simdjson/fallback/stringparsing.h */ -/* begin file include/simdjson/fallback/numberparsing.h */ -#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_H -#define SIMDJSON_FALLBACK_NUMBERPARSING_H - -#ifdef JSON_TEST_NUMBERS // for unit testing -void found_invalid_number(const uint8_t *buf); -void found_integer(int64_t result, const uint8_t *buf); -void found_unsigned_integer(uint64_t result, const uint8_t *buf); -void found_float(double result, const uint8_t *buf); -#endif - -namespace simdjson { -namespace fallback { -namespace { -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) { - uint64_t val; - memcpy(&val, chars, sizeof(uint64_t)); - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); -} -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - return parse_eight_digits_unrolled(reinterpret_cast(chars)); -} - -} // unnamed namespace -} // namespace fallback -} // namespace simdjson - -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -/* begin file include/simdjson/generic/numberparsing.h */ -#include - -namespace simdjson { -namespace fallback { - -namespace ondemand { -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; -} - -namespace { -/// @private -namespace numberparsing { - - - -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif - -namespace { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} -} -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) { -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) { -#endif - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; - } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - - // The fast path has now failed, so we are failing back on the slower path. - - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = 0.0; - return true; - } - - - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - - - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; - - - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. - - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. In very rare cases, even that - // will not suffice, though it is seemingly very hard to find such a scenario. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // At this point, we might need to add at most one to firstproduct, but this - // can only change the value of firstproduct.high if firstproduct.low is maximal. - if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { - // This is very unlikely, but if so, we need to do much more work! - return false; - } - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); - - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } - - mantissa += mantissa & 1; - mantissa >>= 1; - - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; -} - -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} - -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} - -template -error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { - double d; - if (parse_float_fallback(src, &d)) { - writer.append_double(d); - return SUCCESS; - } - return INVALID_NUMBER(src); -} - -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; -} - -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; - -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; -} - -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well - - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. - - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. - - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. - - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; -} - -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); -} - -template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens - // because slow_float_parsing is a non-inlined function. If we passed our writer reference to - // it, it would force it to be stored in memory, preventing the compiler from picking it apart - // and putting into registers. i.e. if we pass it as reference, it gets slow. - // This is what forces the skip_double, as well. - error_code error = slow_float_parsing(src, writer); - writer.skip_double(); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; -} - -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING - -template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds -} - -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } -#else - -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { - - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } - - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } - - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } - } - - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} - -// Inlineable functions -namespace { - -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); - -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - - -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); -} - -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; -} - -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return ondemand::number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return ondemand::number_type::unsigned_integer; - } - } - return ondemand::number_type::signed_integer; - } - // Hopefully, we have 'e' or 'E' or '.'. - return ondemand::number_type::floating_point_number; -} - -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, src_end, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += negative + 1; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (*p != '"') { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} -} //namespace {} -#endif // SIMDJSON_SKIPNUMBERPARSING - -} // namespace numberparsing -} // unnamed namespace -} // namespace fallback -} // namespace simdjson -/* end file include/simdjson/generic/numberparsing.h */ - -#endif // SIMDJSON_FALLBACK_NUMBERPARSING_H -/* end file include/simdjson/fallback/numberparsing.h */ -/* begin file include/simdjson/fallback/end.h */ -/* end file include/simdjson/fallback/end.h */ - -#endif // SIMDJSON_IMPLEMENTATION_FALLBACK -#endif // SIMDJSON_FALLBACK_H -/* end file include/simdjson/fallback.h */ -/* begin file include/simdjson/icelake.h */ -#ifndef SIMDJSON_ICELAKE_H -#define SIMDJSON_ICELAKE_H - - -#if SIMDJSON_IMPLEMENTATION_ICELAKE - -#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE -#define SIMDJSON_TARGET_ICELAKE -#define SIMDJSON_UNTARGET_ICELAKE -#else -#define SIMDJSON_TARGET_ICELAKE SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt") -#define SIMDJSON_UNTARGET_ICELAKE SIMDJSON_UNTARGET_REGION -#endif - -namespace simdjson { -/** - * Implementation for Icelake (Intel AVX512). - */ -namespace icelake { -} // namespace icelake -} // namespace simdjson - -// -// These two need to be included outside SIMDJSON_TARGET_ICELAKE -// -/* begin file include/simdjson/icelake/implementation.h */ -#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H -#define SIMDJSON_ICELAKE_IMPLEMENTATION_H - - -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE -namespace simdjson { -namespace icelake { - -using namespace simdjson; - -class implementation final : public simdjson::implementation { -public: - simdjson_really_inline implementation() : simdjson::implementation( - "icelake", - "Intel/AMD AVX512", - internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 - ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; - -} // namespace icelake -} // namespace simdjson - -#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H -/* end file include/simdjson/icelake/implementation.h */ -/* begin file include/simdjson/icelake/intrinsics.h */ -#ifndef SIMDJSON_ICELAKE_INTRINSICS_H -#define SIMDJSON_ICELAKE_INTRINSICS_H - - -#ifdef SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO - -#ifdef SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdjson, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ -#include // for _blsr_u64 -#include // for __lzcnt64 -#include // for most things (AVX2, AVX512, _popcnt64) -#include -#include -#include -#include -#include // for _mm_clmulepi64_si128 -// Important: we need the AVX-512 headers: -#include -#include -#include -#include -#include -#include -#include -// unfortunately, we may not get _blsr_u64, but, thankfully, clang -// has it as a macro. -#ifndef _blsr_u64 -// we roll our own -#define _blsr_u64(n) ((n - 1) & n) -#endif // _blsr_u64 -#endif // SIMDJSON_CLANG_VISUAL_STUDIO - -#endif // SIMDJSON_ICELAKE_INTRINSICS_H -/* end file include/simdjson/icelake/intrinsics.h */ - -// -// The rest need to be inside the region -// -/* begin file include/simdjson/icelake/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "icelake" -// #define SIMDJSON_IMPLEMENTATION icelake -SIMDJSON_TARGET_ICELAKE -/* end file include/simdjson/icelake/begin.h */ - -// Declarations -/* begin file include/simdjson/generic/dom_parser_implementation.h */ - -namespace simdjson { -namespace icelake { - -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container - -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; - - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - -}; - -} // namespace icelake -} // namespace simdjson - -namespace simdjson { -namespace icelake { - -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; - - _capacity = capacity; - return SUCCESS; -} - -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } - - _max_depth = max_depth; - return SUCCESS; -} - -} // namespace icelake -} // namespace simdjson -/* end file include/simdjson/generic/dom_parser_implementation.h */ -/* begin file include/simdjson/icelake/bitmanipulation.h */ -#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H -#define SIMDJSON_ICELAKE_BITMANIPULATION_H - -namespace simdjson { -namespace icelake { -namespace { - -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - //////// - // You might expect the next line to be equivalent to - // return (int)_tzcnt_u64(input_num); - // but the generated code differs and might be less efficient? - //////// - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return _blsr_u64(input_num); -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { - return int(_lzcnt_u64(input_num)); -} - -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_really_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif - -simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} - -} // unnamed namespace -} // namespace icelake -} // namespace simdjson - -#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H -/* end file include/simdjson/icelake/bitmanipulation.h */ -/* begin file include/simdjson/icelake/bitmask.h */ -#ifndef SIMDJSON_ICELAKE_BITMASK_H -#define SIMDJSON_ICELAKE_BITMASK_H - -namespace simdjson { -namespace icelake { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processor supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} - -} // unnamed namespace -} // namespace icelake -} // namespace simdjson - -#endif // SIMDJSON_ICELAKE_BITMASK_H -/* end file include/simdjson/icelake/bitmask.h */ -/* begin file include/simdjson/icelake/simd.h */ -#ifndef SIMDJSON_ICELAKE_SIMD_H -#define SIMDJSON_ICELAKE_SIMD_H - - - - -#if defined(__GNUC__) && !defined(__clang__) -#if __GNUC__ == 8 -#define SIMDJSON_GCC8 1 -#endif // __GNUC__ == 8 -#endif // defined(__GNUC__) && !defined(__clang__) - -#if SIMDJSON_GCC8 -/** - * GCC 8 fails to provide _mm512_set_epi8. We roll our own. - */ -inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { - return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), - uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), - uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), - uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), - uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), - uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), - uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), - uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); -} -#endif // SIMDJSON_GCC8 - - - -namespace simdjson { -namespace icelake { -namespace { -namespace simd { - - // Forward-declared so they can be used by splat and friends. - template - struct base { - __m512i value; - - // Zero constructor - simdjson_really_inline base() : value{__m512i()} {} - - // Conversion from SIMD register - simdjson_really_inline base(const __m512i _value) : value(_value) {} - - // Conversion to SIMD register - simdjson_really_inline operator const __m512i&() const { return this->value; } - simdjson_really_inline operator __m512i&() { return this->value; } - - // Bit operations - simdjson_really_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } - simdjson_really_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } - simdjson_really_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } - simdjson_really_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } - simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; - - // Forward-declared so they can be used by splat and friends. - template - struct simd8; - - template> - struct base8: base> { - typedef uint32_t bitmask_t; - typedef uint64_t bitmask2_t; - - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m512i _value) : base>(_value) {} - - simdjson_really_inline uint64_t operator==(const simd8 other) const { return _mm512_cmpeq_epi8_mask(*this, other); } - - static const int SIZE = sizeof(base::value); - - template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { -#if SIMDJSON_GCC8 - // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) - constexpr int shift = 16 - N; - return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); -#else - return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), 16 - N); -#endif - } - }; - - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_really_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } - - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m512i _value) : base8(_value) {} - // Splat constructor - simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} - simdjson_really_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } - simdjson_really_inline simd8 operator~() const { return *this ^ true; } - }; - - template - struct base8_numeric: base8 { - static simdjson_really_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } - static simdjson_really_inline simd8 zero() { return _mm512_setzero_si512(); } - static simdjson_really_inline simd8 load(const T values[64]) { - return _mm512_loadu_si512(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m512i _value) : base8(_value) {} - - // Store to array - simdjson_really_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } - - // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } - - // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm512_shuffle_epi8(lookup_table, *this); - } - - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_really_inline void compress(uint64_t mask, L * output) const { - _mm512_mask_compressstoreu_epi8 (output,~mask,*this); - } - - template - simdjson_really_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; - - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m512i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_really_inline simd8(const int8_t values[64]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_really_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, - int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, - int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, - int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, - int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, - int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 - ) : simd8(_mm512_set_epi8( - v63, v62, v61, v60, v59, v58, v57, v56, - v55, v54, v53, v52, v51, v50, v49, v48, - v47, v46, v45, v44, v43, v42, v41, v40, - v39, v38, v37, v36, v35, v34, v33, v32, - v31, v30, v29, v28, v27, v26, v25, v24, - v23, v22, v21, v20, v19, v18, v17, v16, - v15, v14, v13, v12, v11, v10, v9, v8, - v7, v6, v5, v4, v3, v2, v1, v0 - )) {} - - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } - - simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } - }; - - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m512i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_really_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_really_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, - uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, - uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, - uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, - uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, - uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 - ) : simd8(_mm512_set_epi8( - v63, v62, v61, v60, v59, v58, v57, v56, - v55, v54, v53, v52, v51, v50, v49, v48, - v47, v46, v45, v44, v43, v42, v41, v40, - v39, v38, v37, v36, v35, v34, v33, v32, - v31, v30, v29, v28, v27, v26, v25, v24, - v23, v22, v21, v20, v19, v18, v17, v16, - v15, v14, v13, v12, v11, v10, v9, v8, - v7, v6, v5, v4, v3, v2, v1, v0 - )) {} - - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } - - // Order-specific operations - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_really_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_really_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } - - // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - - simdjson_really_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } - simdjson_really_inline bool bits_not_set_anywhere() const { - return !_mm512_test_epi8_mask(*this, *this); - } - simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_really_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_really_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_really_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } - }; - - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_really_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} - - simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(mask, output); - return 64 - count_ones(mask); - } - - simdjson_really_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - } - - simdjson_really_inline simd8 reduce_or() const { - return this->chunks[0]; - } - - simdjson_really_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] | mask - ); - } - - simdjson_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return this->chunks[0] == mask; - } - - simdjson_really_inline uint64_t eq(const simd8x64 &other) const { - return this->chunks[0] == other.chunks[0]; - } - - simdjson_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return this->chunks[0] <= mask; - } - }; // struct simd8x64 - -} // namespace simd - -} // unnamed namespace -} // namespace icelake -} // namespace simdjson - -#endif // SIMDJSON_ICELAKE_SIMD_H -/* end file include/simdjson/icelake/simd.h */ -/* begin file include/simdjson/generic/jsoncharutils.h */ - -namespace simdjson { -namespace icelake { -namespace { -namespace jsoncharutils { - -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} - -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} - -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} - -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} - -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif - -using internal::value128; - -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { - value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} - -} // namespace jsoncharutils -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file include/simdjson/generic/jsoncharutils.h */ -/* begin file include/simdjson/generic/atomparsing.h */ -namespace simdjson { -namespace icelake { -namespace { -/// @private -namespace atomparsing { - -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - - -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} - -} // namespace atomparsing -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file include/simdjson/generic/atomparsing.h */ -/* begin file include/simdjson/icelake/stringparsing.h */ -#ifndef SIMDJSON_ICELAKE_STRINGPARSING_H -#define SIMDJSON_ICELAKE_STRINGPARSING_H - - -namespace simdjson { -namespace icelake { -namespace { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } - - uint64_t bs_bits; - uint64_t quote_bits; -}; // struct backslash_and_quote - -simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 15 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v(src); - // store to dest unconditionally - we can overwrite the bits we don't like later - v.store(dst); - return { - static_cast(v == '\\'), // bs_bits - static_cast(v == '"'), // quote_bits - }; -} - -} // unnamed namespace -} // namespace icelake -} // namespace simdjson - -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace icelake { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - -#endif // SIMDJSON_ICELAKE_STRINGPARSING_H -/* end file include/simdjson/icelake/stringparsing.h */ -/* begin file include/simdjson/icelake/numberparsing.h */ -#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_H -#define SIMDJSON_ICELAKE_NUMBERPARSING_H - -namespace simdjson { -namespace icelake { -namespace { - -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} - -} // unnamed namespace -} // namespace icelake -} // namespace simdjson - -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -/* begin file include/simdjson/generic/numberparsing.h */ -#include - -namespace simdjson { -namespace icelake { - -namespace ondemand { -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; -} - -namespace { -/// @private -namespace numberparsing { - - - -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif - -namespace { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} -} -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) { -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) { -#endif - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; - } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - - // The fast path has now failed, so we are failing back on the slower path. - - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = 0.0; - return true; - } - - - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - - - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; - - - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. - - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. In very rare cases, even that - // will not suffice, though it is seemingly very hard to find such a scenario. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // At this point, we might need to add at most one to firstproduct, but this - // can only change the value of firstproduct.high if firstproduct.low is maximal. - if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { - // This is very unlikely, but if so, we need to do much more work! - return false; - } - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); - - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } - - mantissa += mantissa & 1; - mantissa >>= 1; - - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; -} - -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} - -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} - -template -error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { - double d; - if (parse_float_fallback(src, &d)) { - writer.append_double(d); - return SUCCESS; - } - return INVALID_NUMBER(src); -} - -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; -} - -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; - -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; -} - -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well - - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. - - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. - - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. - - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; -} - -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); -} - -template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens - // because slow_float_parsing is a non-inlined function. If we passed our writer reference to - // it, it would force it to be stored in memory, preventing the compiler from picking it apart - // and putting into registers. i.e. if we pass it as reference, it gets slow. - // This is what forces the skip_double, as well. - error_code error = slow_float_parsing(src, writer); - writer.skip_double(); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; -} - -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING - -template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds -} - -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } -#else - -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { - - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } - - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } - - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } - } - - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} - -// Inlineable functions -namespace { - -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); - -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - - -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); -} - -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; -} - -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return ondemand::number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return ondemand::number_type::unsigned_integer; - } - } - return ondemand::number_type::signed_integer; - } - // Hopefully, we have 'e' or 'E' or '.'. - return ondemand::number_type::floating_point_number; -} - -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, src_end, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += negative + 1; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (*p != '"') { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} -} //namespace {} -#endif // SIMDJSON_SKIPNUMBERPARSING - -} // namespace numberparsing -} // unnamed namespace -} // namespace icelake -} // namespace simdjson -/* end file include/simdjson/generic/numberparsing.h */ - -#endif // SIMDJSON_ICELAKE_NUMBERPARSING_H -/* end file include/simdjson/icelake/numberparsing.h */ -/* begin file include/simdjson/icelake/end.h */ -SIMDJSON_UNTARGET_ICELAKE -/* end file include/simdjson/icelake/end.h */ - -#endif // SIMDJSON_IMPLEMENTATION_ICELAKE -#endif // SIMDJSON_ICELAKE_H -/* end file include/simdjson/icelake.h */ -/* begin file include/simdjson/haswell.h */ -#ifndef SIMDJSON_HASWELL_H -#define SIMDJSON_HASWELL_H - - -#if SIMDJSON_IMPLEMENTATION_HASWELL - -#if SIMDJSON_CAN_ALWAYS_RUN_HASWELL -#define SIMDJSON_TARGET_HASWELL -#define SIMDJSON_UNTARGET_HASWELL -#else -#define SIMDJSON_TARGET_HASWELL SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt") -#define SIMDJSON_UNTARGET_HASWELL SIMDJSON_UNTARGET_REGION -#endif - -namespace simdjson { -/** - * Implementation for Haswell (Intel AVX2). - */ -namespace haswell { -} // namespace haswell -} // namespace simdjson - -// -// These two need to be included outside SIMDJSON_TARGET_HASWELL -// -/* begin file include/simdjson/haswell/implementation.h */ -#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H -#define SIMDJSON_HASWELL_IMPLEMENTATION_H - - -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL -namespace simdjson { -namespace haswell { - -using namespace simdjson; - -class implementation final : public simdjson::implementation { -public: - simdjson_really_inline implementation() : simdjson::implementation( - "haswell", - "Intel/AMD AVX2", - internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 - ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; - -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H -/* end file include/simdjson/haswell/implementation.h */ -/* begin file include/simdjson/haswell/intrinsics.h */ -#ifndef SIMDJSON_HASWELL_INTRINSICS_H -#define SIMDJSON_HASWELL_INTRINSICS_H - - -#ifdef SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO - -#ifdef SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - * e.g., if __AVX2__ is set... in turn, we normally set these - * macros by compiling against the corresponding architecture - * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole - * software with these advanced instructions. In simdjson, we - * want to compile the whole program for a generic target, - * and only target our specific kernels. As a workaround, - * we directly include the needed headers. These headers would - * normally guard against such usage, but we carefully included - * (or ) before, so the headers - * are fooled. - */ -#include // for _blsr_u64 -#include // for __lzcnt64 -#include // for most things (AVX2, AVX512, _popcnt64) -#include -#include -#include -#include -#include // for _mm_clmulepi64_si128 -// unfortunately, we may not get _blsr_u64, but, thankfully, clang -// has it as a macro. -#ifndef _blsr_u64 -// we roll our own -#define _blsr_u64(n) ((n - 1) & n) -#endif // _blsr_u64 -#endif // SIMDJSON_CLANG_VISUAL_STUDIO - -#endif // SIMDJSON_HASWELL_INTRINSICS_H -/* end file include/simdjson/haswell/intrinsics.h */ - -// -// The rest need to be inside the region -// -/* begin file include/simdjson/haswell/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "haswell" -// #define SIMDJSON_IMPLEMENTATION haswell -SIMDJSON_TARGET_HASWELL -/* end file include/simdjson/haswell/begin.h */ - -// Declarations -/* begin file include/simdjson/generic/dom_parser_implementation.h */ - -namespace simdjson { -namespace haswell { - -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container - -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; - - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - -}; - -} // namespace haswell -} // namespace simdjson - -namespace simdjson { -namespace haswell { - -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; - - _capacity = capacity; - return SUCCESS; -} - -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } - - _max_depth = max_depth; - return SUCCESS; -} - -} // namespace haswell -} // namespace simdjson -/* end file include/simdjson/generic/dom_parser_implementation.h */ -/* begin file include/simdjson/haswell/bitmanipulation.h */ -#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H -#define SIMDJSON_HASWELL_BITMANIPULATION_H - -namespace simdjson { -namespace haswell { -namespace { - -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - //////// - // You might expect the next line to be equivalent to - // return (int)_tzcnt_u64(input_num); - // but the generated code differs and might be less efficient? - //////// - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return _blsr_u64(input_num); -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { - return int(_lzcnt_u64(input_num)); -} - -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_really_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif - -simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} - -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_HASWELL_BITMANIPULATION_H -/* end file include/simdjson/haswell/bitmanipulation.h */ -/* begin file include/simdjson/haswell/bitmask.h */ -#ifndef SIMDJSON_HASWELL_BITMASK_H -#define SIMDJSON_HASWELL_BITMASK_H - -namespace simdjson { -namespace haswell { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processor supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} - -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_HASWELL_BITMASK_H -/* end file include/simdjson/haswell/bitmask.h */ -/* begin file include/simdjson/haswell/simd.h */ -#ifndef SIMDJSON_HASWELL_SIMD_H -#define SIMDJSON_HASWELL_SIMD_H - - -namespace simdjson { -namespace haswell { -namespace { -namespace simd { - - // Forward-declared so they can be used by splat and friends. - template - struct base { - __m256i value; - - // Zero constructor - simdjson_really_inline base() : value{__m256i()} {} - - // Conversion from SIMD register - simdjson_really_inline base(const __m256i _value) : value(_value) {} - - // Conversion to SIMD register - simdjson_really_inline operator const __m256i&() const { return this->value; } - simdjson_really_inline operator __m256i&() { return this->value; } - - // Bit operations - simdjson_really_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } - simdjson_really_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } - simdjson_really_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } - simdjson_really_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } - simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; - - // Forward-declared so they can be used by splat and friends. - template - struct simd8; - - template> - struct base8: base> { - typedef uint32_t bitmask_t; - typedef uint64_t bitmask2_t; - - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m256i _value) : base>(_value) {} - - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } - - static const int SIZE = sizeof(base::value); - - template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { - return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); - } - }; - - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_really_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } - - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m256i _value) : base8(_value) {} - // Splat constructor - simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} - - simdjson_really_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } - simdjson_really_inline bool any() const { return !_mm256_testz_si256(*this, *this); } - simdjson_really_inline simd8 operator~() const { return *this ^ true; } - }; - - template - struct base8_numeric: base8 { - static simdjson_really_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } - static simdjson_really_inline simd8 zero() { return _mm256_setzero_si256(); } - static simdjson_really_inline simd8 load(const T values[32]) { - return _mm256_loadu_si256(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m256i _value) : base8(_value) {} - - // Store to array - simdjson_really_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } - - // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } - - // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm256_shuffle_epi8(lookup_table, *this); - } - - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_really_inline void compress(uint32_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in four steps, first 8 bytes and then second 8 bytes... - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits - uint8_t mask3 = uint8_t(mask >> 16); // ... - uint8_t mask4 = uint8_t(mask >> 24); // ... - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], - thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask and so forth - shufmask = - _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, - 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); - // we still need to put the pieces back together. - // we compute the popcount of the first words: - int pop1 = BitsSetTable256mul2[mask1]; - int pop3 = BitsSetTable256mul2[mask3]; - - // then load the corresponding mask - // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. - __m256i v256 = _mm256_castsi128_si256( - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); - __m256i compactmask = _mm256_insertf128_si256(v256, - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); - __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); - // We just need to write out the result. - // This is the tricky bit that is hard to do - // if we want to return a SIMD register, since there - // is no single-instruction approach to recombine - // the two 128-bit lanes with an offset. - __m128i v128; - v128 = _mm256_castsi256_si128(almostthere); - _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); - v128 = _mm256_extractf128_si256(almostthere, 1); - _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); - } - - template - simdjson_really_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; - - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m256i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_really_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, - int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, - int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 - ) : simd8(_mm256_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v16,v17,v18,v19,v20,v21,v22,v23, - v24,v25,v26,v27,v28,v29,v30,v31 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } - }; - - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m256i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_really_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, - uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, - uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 - ) : simd8(_mm256_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v16,v17,v18,v19,v20,v21,v22,v23, - v24,v25,v26,v27,v28,v29,v30,v31 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15, - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } - - // Order-specific operations - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_really_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_really_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } - - // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_really_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } - simdjson_really_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } - simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_really_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_really_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_really_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } - }; - - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} - - simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { - uint32_t mask1 = uint32_t(mask); - uint32_t mask2 = uint32_t(mask >> 32); - this->chunks[0].compress(mask1, output); - this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); - return 64 - count_ones(mask); - } - - simdjson_really_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - } - - simdjson_really_inline uint64_t to_bitmask() const { - uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r_hi = this->chunks[1].to_bitmask(); - return r_lo | (r_hi << 32); - } - - simdjson_really_inline simd8 reduce_or() const { - return this->chunks[0] | this->chunks[1]; - } - - simdjson_really_inline simd8x64 bit_or(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] | mask, - this->chunks[1] | mask - ); - } - - simdjson_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask - ).to_bitmask(); - } - - simdjson_really_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1] - ).to_bitmask(); - } - - simdjson_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 - -} // namespace simd - -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -#endif // SIMDJSON_HASWELL_SIMD_H -/* end file include/simdjson/haswell/simd.h */ -/* begin file include/simdjson/generic/jsoncharutils.h */ - -namespace simdjson { -namespace haswell { -namespace { -namespace jsoncharutils { - -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} - -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} - -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} - -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} - -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif - -using internal::value128; - -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { - value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} - -} // namespace jsoncharutils -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file include/simdjson/generic/jsoncharutils.h */ -/* begin file include/simdjson/generic/atomparsing.h */ -namespace simdjson { -namespace haswell { -namespace { -/// @private -namespace atomparsing { - -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - - -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} - -} // namespace atomparsing -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file include/simdjson/generic/atomparsing.h */ -/* begin file include/simdjson/haswell/stringparsing.h */ -#ifndef SIMDJSON_HASWELL_STRINGPARSING_H -#define SIMDJSON_HASWELL_STRINGPARSING_H - - -namespace simdjson { -namespace haswell { -namespace { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } - - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote - -simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 15 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v(src); - // store to dest unconditionally - we can overwrite the bits we don't like later - v.store(dst); - return { - static_cast((v == '\\').to_bitmask()), // bs_bits - static_cast((v == '"').to_bitmask()), // quote_bits - }; -} - -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace haswell { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - -#endif // SIMDJSON_HASWELL_STRINGPARSING_H -/* end file include/simdjson/haswell/stringparsing.h */ -/* begin file include/simdjson/haswell/numberparsing.h */ -#ifndef SIMDJSON_HASWELL_NUMBERPARSING_H -#define SIMDJSON_HASWELL_NUMBERPARSING_H - -namespace simdjson { -namespace haswell { -namespace { - -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} - -} // unnamed namespace -} // namespace haswell -} // namespace simdjson - -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -/* begin file include/simdjson/generic/numberparsing.h */ -#include - -namespace simdjson { -namespace haswell { - -namespace ondemand { -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; -} - -namespace { -/// @private -namespace numberparsing { - - - -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif - -namespace { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} -} -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) { -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) { -#endif - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; - } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - - // The fast path has now failed, so we are failing back on the slower path. - - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = 0.0; - return true; - } - - - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - - - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; - - - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. - - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. In very rare cases, even that - // will not suffice, though it is seemingly very hard to find such a scenario. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // At this point, we might need to add at most one to firstproduct, but this - // can only change the value of firstproduct.high if firstproduct.low is maximal. - if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { - // This is very unlikely, but if so, we need to do much more work! - return false; - } - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); - - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } - - mantissa += mantissa & 1; - mantissa >>= 1; - - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; -} - -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} - -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} - -template -error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { - double d; - if (parse_float_fallback(src, &d)) { - writer.append_double(d); - return SUCCESS; - } - return INVALID_NUMBER(src); -} - -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; -} - -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; - -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; -} - -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well - - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. - - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. - - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. - - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; -} - -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); -} - -template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens - // because slow_float_parsing is a non-inlined function. If we passed our writer reference to - // it, it would force it to be stored in memory, preventing the compiler from picking it apart - // and putting into registers. i.e. if we pass it as reference, it gets slow. - // This is what forces the skip_double, as well. - error_code error = slow_float_parsing(src, writer); - writer.skip_double(); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; -} - -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING - -template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds -} - -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } -#else - -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { - - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } - - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } - - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } - } - - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} - -// Inlineable functions -namespace { - -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); - -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - - -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); -} - -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; -} - -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return ondemand::number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return ondemand::number_type::unsigned_integer; - } - } - return ondemand::number_type::signed_integer; - } - // Hopefully, we have 'e' or 'E' or '.'. - return ondemand::number_type::floating_point_number; -} - -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, src_end, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += negative + 1; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (*p != '"') { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} -} //namespace {} -#endif // SIMDJSON_SKIPNUMBERPARSING - -} // namespace numberparsing -} // unnamed namespace -} // namespace haswell -} // namespace simdjson -/* end file include/simdjson/generic/numberparsing.h */ - -#endif // SIMDJSON_HASWELL_NUMBERPARSING_H -/* end file include/simdjson/haswell/numberparsing.h */ -/* begin file include/simdjson/haswell/end.h */ -SIMDJSON_UNTARGET_HASWELL -/* end file include/simdjson/haswell/end.h */ - -#endif // SIMDJSON_IMPLEMENTATION_HASWELL -#endif // SIMDJSON_HASWELL_COMMON_H -/* end file include/simdjson/haswell.h */ -/* begin file include/simdjson/ppc64.h */ -#ifndef SIMDJSON_PPC64_H -#define SIMDJSON_PPC64_H - - -#if SIMDJSON_IMPLEMENTATION_PPC64 - -namespace simdjson { -/** - * Implementation for ALTIVEC (PPC64). - */ -namespace ppc64 { -} // namespace ppc64 -} // namespace simdjson - -/* begin file include/simdjson/ppc64/implementation.h */ -#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H -#define SIMDJSON_PPC64_IMPLEMENTATION_H - - -namespace simdjson { -namespace ppc64 { - -namespace { -using namespace simdjson; -using namespace simdjson::dom; -} // namespace - -class implementation final : public simdjson::implementation { -public: - simdjson_really_inline implementation() - : simdjson::implementation("ppc64", "PPC64 ALTIVEC", - internal::instruction_set::ALTIVEC) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, size_t max_length, - std::unique_ptr &dst) - const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, - uint8_t *dst, - size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, - size_t len) const noexcept final; -}; - -} // namespace ppc64 -} // namespace simdjson - -#endif // SIMDJSON_PPC64_IMPLEMENTATION_H -/* end file include/simdjson/ppc64/implementation.h */ - -/* begin file include/simdjson/ppc64/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "ppc64" -// #define SIMDJSON_IMPLEMENTATION ppc64 -/* end file include/simdjson/ppc64/begin.h */ - -// Declarations -/* begin file include/simdjson/generic/dom_parser_implementation.h */ - -namespace simdjson { -namespace ppc64 { - -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container - -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; - - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - -}; - -} // namespace ppc64 -} // namespace simdjson - -namespace simdjson { -namespace ppc64 { - -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; - - _capacity = capacity; - return SUCCESS; -} - -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } - - _max_depth = max_depth; - return SUCCESS; -} - -} // namespace ppc64 -} // namespace simdjson -/* end file include/simdjson/generic/dom_parser_implementation.h */ -/* begin file include/simdjson/ppc64/intrinsics.h */ -#ifndef SIMDJSON_PPC64_INTRINSICS_H -#define SIMDJSON_PPC64_INTRINSICS_H - - -// This should be the correct header whether -// you use visual studio or other compilers. -#include - -// These are defined by altivec.h in GCC toolchain, it is safe to undef them. -#ifdef bool -#undef bool -#endif - -#ifdef vector -#undef vector -#endif - -#endif // SIMDJSON_PPC64_INTRINSICS_H -/* end file include/simdjson/ppc64/intrinsics.h */ -/* begin file include/simdjson/ppc64/bitmanipulation.h */ -#ifndef SIMDJSON_PPC64_BITMANIPULATION_H -#define SIMDJSON_PPC64_BITMANIPULATION_H - -namespace simdjson { -namespace ppc64 { -namespace { - -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num - 1); -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} - -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_really_inline int count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num); // Visual Studio wants two underscores -} -#else -simdjson_really_inline int count_ones(uint64_t input_num) { - return __builtin_popcountll(input_num); -} -#endif - -simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - *result = value1 + value2; - return *result < value1; -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson - -#endif // SIMDJSON_PPC64_BITMANIPULATION_H -/* end file include/simdjson/ppc64/bitmanipulation.h */ -/* begin file include/simdjson/ppc64/bitmask.h */ -#ifndef SIMDJSON_PPC64_BITMASK_H -#define SIMDJSON_PPC64_BITMASK_H - -namespace simdjson { -namespace ppc64 { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is -// encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_really_inline uint64_t prefix_xor(uint64_t bitmask) { - // You can use the version below, however gcc sometimes miscompiles - // vec_pmsum_be, it happens somewhere around between 8 and 9th version. - // The performance boost was not noticeable, falling back to a usual - // implementation. - // __vector unsigned long long all_ones = {~0ull, ~0ull}; - // __vector unsigned long long mask = {bitmask, 0}; - // // Clang and GCC return different values for pmsum for ull so cast it to one. - // // Generally it is not specified by ALTIVEC ISA what is returned by - // // vec_pmsum_be. - // #if defined(__LITTLE_ENDIAN__) - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); - // #else - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); - // #endif - bitmask ^= bitmask << 1; - bitmask ^= bitmask << 2; - bitmask ^= bitmask << 4; - bitmask ^= bitmask << 8; - bitmask ^= bitmask << 16; - bitmask ^= bitmask << 32; - return bitmask; -} - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson - -#endif -/* end file include/simdjson/ppc64/bitmask.h */ -/* begin file include/simdjson/ppc64/simd.h */ -#ifndef SIMDJSON_PPC64_SIMD_H -#define SIMDJSON_PPC64_SIMD_H - -#include - -namespace simdjson { -namespace ppc64 { -namespace { -namespace simd { - -using __m128i = __vector unsigned char; - -template struct base { - __m128i value; - - // Zero constructor - simdjson_really_inline base() : value{__m128i()} {} - - // Conversion from SIMD register - simdjson_really_inline base(const __m128i _value) : value(_value) {} - - // Conversion to SIMD register - simdjson_really_inline operator const __m128i &() const { - return this->value; - } - simdjson_really_inline operator __m128i &() { return this->value; } - - // Bit operations - simdjson_really_inline Child operator|(const Child other) const { - return vec_or(this->value, (__m128i)other); - } - simdjson_really_inline Child operator&(const Child other) const { - return vec_and(this->value, (__m128i)other); - } - simdjson_really_inline Child operator^(const Child other) const { - return vec_xor(this->value, (__m128i)other); - } - simdjson_really_inline Child bit_andnot(const Child other) const { - return vec_andc(this->value, (__m128i)other); - } - simdjson_really_inline Child &operator|=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast | other; - return *this_cast; - } - simdjson_really_inline Child &operator&=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast & other; - return *this_cast; - } - simdjson_really_inline Child &operator^=(const Child other) { - auto this_cast = static_cast(this); - *this_cast = *this_cast ^ other; - return *this_cast; - } -}; - -// Forward-declared so they can be used by splat and friends. -template struct simd8; - -template > -struct base8 : base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m128i _value) : base>(_value) {} - - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { - return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); - } - - static const int SIZE = sizeof(base>::value); - - template - simdjson_really_inline simd8 prev(simd8 prev_chunk) const { - __m128i chunk = this->value; -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve(this->value); - prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); -#endif - chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); -#ifdef __LITTLE_ENDIAN__ - chunk = (__m128i)vec_reve((__m128i)chunk); -#endif - return chunk; - } -}; - -// SIMD byte mask type (returned by things like eq and gt) -template <> struct simd8 : base8 { - static simdjson_really_inline simd8 splat(bool _value) { - return (__m128i)vec_splats((unsigned char)(-(!!_value))); - } - - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m128i _value) - : base8(_value) {} - // Splat constructor - simdjson_really_inline simd8(bool _value) - : base8(splat(_value)) {} - - simdjson_really_inline int to_bitmask() const { - __vector unsigned long long result; - const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, - 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; - - result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, - (__m128i)perm_mask)); -#ifdef __LITTLE_ENDIAN__ - return static_cast(result[1]); -#else - return static_cast(result[0]); -#endif - } - simdjson_really_inline bool any() const { - return !vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdjson_really_inline simd8 operator~() const { - return this->value ^ (__m128i)splat(true); - } -}; - -template struct base8_numeric : base8 { - static simdjson_really_inline simd8 splat(T value) { - (void)value; - return (__m128i)vec_splats(value); - } - static simdjson_really_inline simd8 zero() { return splat(0); } - static simdjson_really_inline simd8 load(const T values[16]) { - return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, - T v5, T v6, T v7, T v8, T v9, - T v10, T v11, T v12, T v13, - T v14, T v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, - v14, v15); - } - - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m128i _value) - : base8(_value) {} - - // Store to array - simdjson_really_inline void store(T dst[16]) const { - vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); - } - - // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { - return (__m128i)((__m128i)this->value + (__m128i)other); - } - simdjson_really_inline simd8 operator-(const simd8 other) const { - return (__m128i)((__m128i)this->value - (__m128i)other); - } - simdjson_really_inline simd8 &operator+=(const simd8 other) { - *this = *this + other; - return *static_cast *>(this); - } - simdjson_really_inline simd8 &operator-=(const simd8 other) { - *this = *this - other; - return *static_cast *>(this); - } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior - // for out of range values) - template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { - return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); - } - - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted - // as a bitset). Passing a 0 value for mask would be equivalent to writing out - // every byte to output. Only the first 16 - count_ones(mask) bytes of the - // result are significant but 16 bytes get written. Design consideration: it - // seems like a function with the signature simd8 compress(uint32_t mask) - // would be sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_really_inline void compress(uint16_t mask, L *output) const { - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - using internal::thintable_epi8; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. -#ifdef __LITTLE_ENDIAN__ - __m128i shufmask = (__m128i)(__vector unsigned long long){ - thintable_epi8[mask1], thintable_epi8[mask2]}; -#else - __m128i shufmask = (__m128i)(__vector unsigned long long){ - thintable_epi8[mask2], thintable_epi8[mask1]}; - shufmask = (__m128i)vec_reve((__m128i)shufmask); -#endif - // we increment by 0x08 the second half of the mask - shufmask = ((__m128i)shufmask) + - ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); - - // this is the version "nearly pruned" - __m128i pruned = vec_perm(this->value, this->value, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); - vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); - } - - template - simdjson_really_inline simd8 - lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, - L replace5, L replace6, L replace7, L replace8, L replace9, - L replace10, L replace11, L replace12, L replace13, L replace14, - L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, replace4, replace5, replace6, - replace7, replace8, replace9, replace10, replace11, replace12, - replace13, replace14, replace15)); - } -}; - -// Signed bytes -template <> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_really_inline simd8(const int8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, - int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) - : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10, v11, v12, v13, v14, - v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 - repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, - int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, - int8_t v12, int8_t v13, int8_t v14, int8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Order-sensitive comparisons - simdjson_really_inline simd8 - max_val(const simd8 other) const { - return (__m128i)vec_max((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_really_inline simd8 - min_val(const simd8 other) const { - return (__m128i)vec_min((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_really_inline simd8 - operator>(const simd8 other) const { - return (__m128i)vec_cmpgt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } - simdjson_really_inline simd8 - operator<(const simd8 other) const { - return (__m128i)vec_cmplt((__vector signed char)this->value, - (__vector signed char)(__m128i)other); - } -}; - -// Unsigned bytes -template <> struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) - : base8_numeric(_value) {} - // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_really_inline simd8(const uint8_t *values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_really_inline - simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, - uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, - uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) - : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15}) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 - repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, - uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, - uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, - uint8_t v15) { - return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, - v13, v14, v15); - } - - // Saturated math - simdjson_really_inline simd8 - saturating_add(const simd8 other) const { - return (__m128i)vec_adds(this->value, (__m128i)other); - } - simdjson_really_inline simd8 - saturating_sub(const simd8 other) const { - return (__m128i)vec_subs(this->value, (__m128i)other); - } - - // Order-specific operations - simdjson_really_inline simd8 - max_val(const simd8 other) const { - return (__m128i)vec_max(this->value, (__m128i)other); - } - simdjson_really_inline simd8 - min_val(const simd8 other) const { - return (__m128i)vec_min(this->value, (__m128i)other); - } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 - gt_bits(const simd8 other) const { - return this->saturating_sub(other); - } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 - lt_bits(const simd8 other) const { - return other.saturating_sub(*this); - } - simdjson_really_inline simd8 - operator<=(const simd8 other) const { - return other.max_val(*this) == other; - } - simdjson_really_inline simd8 - operator>=(const simd8 other) const { - return other.min_val(*this) == other; - } - simdjson_really_inline simd8 - operator>(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - simdjson_really_inline simd8 - operator<(const simd8 other) const { - return this->gt_bits(other).any_bits_set(); - } - - // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { - return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); - } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { - return (*this & bits).bits_not_set(); - } - simdjson_really_inline simd8 any_bits_set() const { - return ~this->bits_not_set(); - } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { - return ~this->bits_not_set(bits); - } - simdjson_really_inline bool bits_not_set_anywhere() const { - return vec_all_eq(this->value, (__m128i)vec_splats(0)); - } - simdjson_really_inline bool any_bits_set_anywhere() const { - return !bits_not_set_anywhere(); - } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { - return vec_all_eq(vec_and(this->value, (__m128i)bits), - (__m128i)vec_splats(0)); - } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { - return !bits_not_set_anywhere(bits); - } - template simdjson_really_inline simd8 shr() const { - return simd8( - (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); - } - template simdjson_really_inline simd8 shl() const { - return simd8( - (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); - } -}; - -template struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, - "PPC64 kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64 &o) = delete; // no copy allowed - simd8x64 & - operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, - const simd8 chunk2, const simd8 chunk3) - : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_really_inline simd8x64(const T ptr[64]) - : chunks{simd8::load(ptr), simd8::load(ptr + 16), - simd8::load(ptr + 32), simd8::load(ptr + 48)} {} - - simdjson_really_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr + sizeof(simd8) * 0); - this->chunks[1].store(ptr + sizeof(simd8) * 1); - this->chunks[2].store(ptr + sizeof(simd8) * 2); - this->chunks[3].store(ptr + sizeof(simd8) * 3); - } - - simdjson_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | - (this->chunks[2] | this->chunks[3]); - } - - simdjson_really_inline uint64_t compress(uint64_t mask, T *output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), - output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), - output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), - output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); - } - - simdjson_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); - uint64_t r1 = this->chunks[1].to_bitmask(); - uint64_t r2 = this->chunks[2].to_bitmask(); - uint64_t r3 = this->chunks[3].to_bitmask(); - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } - - simdjson_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, - this->chunks[2] == mask, this->chunks[3] == mask) - .to_bitmask(); - } - - simdjson_really_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64(this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3]) - .to_bitmask(); - } - - simdjson_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, - this->chunks[2] <= mask, this->chunks[3] <= mask) - .to_bitmask(); - } -}; // struct simd8x64 - -} // namespace simd -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson - -#endif // SIMDJSON_PPC64_SIMD_INPUT_H -/* end file include/simdjson/ppc64/simd.h */ -/* begin file include/simdjson/generic/jsoncharutils.h */ - -namespace simdjson { -namespace ppc64 { -namespace { -namespace jsoncharutils { - -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} - -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} - -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} - -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} - -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif - -using internal::value128; - -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { - value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} - -} // namespace jsoncharutils -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file include/simdjson/generic/jsoncharutils.h */ -/* begin file include/simdjson/generic/atomparsing.h */ -namespace simdjson { -namespace ppc64 { -namespace { -/// @private -namespace atomparsing { - -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - - -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} - -} // namespace atomparsing -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file include/simdjson/generic/atomparsing.h */ -/* begin file include/simdjson/ppc64/stringparsing.h */ -#ifndef SIMDJSON_PPC64_STRINGPARSING_H -#define SIMDJSON_PPC64_STRINGPARSING_H - - -namespace simdjson { -namespace ppc64 { -namespace { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote - copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_really_inline bool has_quote_first() { - return ((bs_bits - 1) & quote_bits) != 0; - } - simdjson_really_inline bool has_backslash() { return bs_bits != 0; } - simdjson_really_inline int quote_index() { - return trailing_zeroes(quote_bits); - } - simdjson_really_inline int backslash_index() { - return trailing_zeroes(bs_bits); - } - - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote - -simdjson_really_inline backslash_and_quote -backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), - "backslash and quote finder must process fewer than " - "SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + sizeof(v0)); - v0.store(dst); - v1.store(dst + sizeof(v0)); - - // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on - // PPC; therefore, we smash them together into a 64-byte mask and get the - // bitmask from there. - uint64_t bs_and_quote = - simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); - return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits - }; -} - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson - -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace ppc64 { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - -#endif // SIMDJSON_PPC64_STRINGPARSING_H -/* end file include/simdjson/ppc64/stringparsing.h */ -/* begin file include/simdjson/ppc64/numberparsing.h */ -#ifndef SIMDJSON_PPC64_NUMBERPARSING_H -#define SIMDJSON_PPC64_NUMBERPARSING_H - -#if defined(__linux__) -#include -#elif defined(__FreeBSD__) -#include -#endif - -namespace simdjson { -namespace ppc64 { -namespace { - -// we don't have appropriate instructions, so let us use a scalar function -// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ -static simdjson_really_inline uint32_t -parse_eight_digits_unrolled(const uint8_t *chars) { - uint64_t val; - std::memcpy(&val, chars, sizeof(uint64_t)); -#ifdef __BIG_ENDIAN__ -#if defined(__linux__) - val = bswap_64(val); -#elif defined(__FreeBSD__) - val = bswap64(val); -#endif -#endif - val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); -} - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson - -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -/* begin file include/simdjson/generic/numberparsing.h */ -#include - -namespace simdjson { -namespace ppc64 { - -namespace ondemand { -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; -} - -namespace { -/// @private -namespace numberparsing { - - - -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif - -namespace { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} -} -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) { -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) { -#endif - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; - } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - - // The fast path has now failed, so we are failing back on the slower path. - - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = 0.0; - return true; - } - - - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - - - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; - - - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. - - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. In very rare cases, even that - // will not suffice, though it is seemingly very hard to find such a scenario. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // At this point, we might need to add at most one to firstproduct, but this - // can only change the value of firstproduct.high if firstproduct.low is maximal. - if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { - // This is very unlikely, but if so, we need to do much more work! - return false; - } - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); - - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } - - mantissa += mantissa & 1; - mantissa >>= 1; - - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; -} - -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} - -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} - -template -error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { - double d; - if (parse_float_fallback(src, &d)) { - writer.append_double(d); - return SUCCESS; - } - return INVALID_NUMBER(src); -} - -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; -} - -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; - -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; -} - -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well - - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. - - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. - - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. - - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; -} - -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); -} - -template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens - // because slow_float_parsing is a non-inlined function. If we passed our writer reference to - // it, it would force it to be stored in memory, preventing the compiler from picking it apart - // and putting into registers. i.e. if we pass it as reference, it gets slow. - // This is what forces the skip_double, as well. - error_code error = slow_float_parsing(src, writer); - writer.skip_double(); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; -} - -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING - -template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds -} - -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } -#else - -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { - - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } - - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } - - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } - } - - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} - -// Inlineable functions -namespace { - -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); - -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - - -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); -} - -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; -} - -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return ondemand::number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return ondemand::number_type::unsigned_integer; - } - } - return ondemand::number_type::signed_integer; - } - // Hopefully, we have 'e' or 'E' or '.'. - return ondemand::number_type::floating_point_number; -} - -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, src_end, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += negative + 1; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (*p != '"') { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} -} //namespace {} -#endif // SIMDJSON_SKIPNUMBERPARSING - -} // namespace numberparsing -} // unnamed namespace -} // namespace ppc64 -} // namespace simdjson -/* end file include/simdjson/generic/numberparsing.h */ - -#endif // SIMDJSON_PPC64_NUMBERPARSING_H -/* end file include/simdjson/ppc64/numberparsing.h */ -/* begin file include/simdjson/ppc64/end.h */ -/* end file include/simdjson/ppc64/end.h */ - -#endif // SIMDJSON_IMPLEMENTATION_PPC64 - -#endif // SIMDJSON_PPC64_H -/* end file include/simdjson/ppc64.h */ -/* begin file include/simdjson/westmere.h */ -#ifndef SIMDJSON_WESTMERE_H -#define SIMDJSON_WESTMERE_H - - -#if SIMDJSON_IMPLEMENTATION_WESTMERE - -#if SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -#define SIMDJSON_TARGET_WESTMERE -#define SIMDJSON_UNTARGET_WESTMERE -#else -#define SIMDJSON_TARGET_WESTMERE SIMDJSON_TARGET_REGION("sse4.2,pclmul") -#define SIMDJSON_UNTARGET_WESTMERE SIMDJSON_UNTARGET_REGION -#endif - -namespace simdjson { -/** - * Implementation for Westmere (Intel SSE4.2). - */ -namespace westmere { -} // namespace westmere -} // namespace simdjson - -// -// These two need to be included outside SIMDJSON_TARGET_WESTMERE -// -/* begin file include/simdjson/westmere/implementation.h */ -#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H -#define SIMDJSON_WESTMERE_IMPLEMENTATION_H - - -// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE -namespace simdjson { -namespace westmere { - -namespace { -using namespace simdjson; -using namespace simdjson::dom; -} - -class implementation final : public simdjson::implementation { -public: - simdjson_really_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} - simdjson_warn_unused error_code create_dom_parser_implementation( - size_t capacity, - size_t max_length, - std::unique_ptr& dst - ) const noexcept final; - simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; - simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; -}; - -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H -/* end file include/simdjson/westmere/implementation.h */ -/* begin file include/simdjson/westmere/intrinsics.h */ -#ifndef SIMDJSON_WESTMERE_INTRINSICS_H -#define SIMDJSON_WESTMERE_INTRINSICS_H - -#ifdef SIMDJSON_VISUAL_STUDIO -// under clang within visual studio, this will include -#include // visual studio or clang -#else -#include // elsewhere -#endif // SIMDJSON_VISUAL_STUDIO - - -#ifdef SIMDJSON_CLANG_VISUAL_STUDIO -/** - * You are not supposed, normally, to include these - * headers directly. Instead you should either include intrin.h - * or x86intrin.h. However, when compiling with clang - * under Windows (i.e., when _MSC_VER is set), these headers - * only get included *if* the corresponding features are detected - * from macros: - */ -#include // for _mm_alignr_epi8 -#include // for _mm_clmulepi64_si128 -#endif - - - -#endif // SIMDJSON_WESTMERE_INTRINSICS_H -/* end file include/simdjson/westmere/intrinsics.h */ - -// -// The rest need to be inside the region -// -/* begin file include/simdjson/westmere/begin.h */ -// redefining SIMDJSON_IMPLEMENTATION to "westmere" -// #define SIMDJSON_IMPLEMENTATION westmere -SIMDJSON_TARGET_WESTMERE -/* end file include/simdjson/westmere/begin.h */ - -// Declarations -/* begin file include/simdjson/generic/dom_parser_implementation.h */ - -namespace simdjson { -namespace westmere { - -// expectation: sizeof(open_container) = 64/8. -struct open_container { - uint32_t tape_index; // where, on the tape, does the scope ([,{) begins - uint32_t count; // how many elements in the scope -}; // struct open_container - -static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); - -class dom_parser_implementation final : public internal::dom_parser_implementation { -public: - /** Tape location of each open { or [ */ - std::unique_ptr open_containers{}; - /** Whether each open container is a [ or { */ - std::unique_ptr is_array{}; - /** Buffer passed to stage 1 */ - const uint8_t *buf{}; - /** Length passed to stage 1 */ - size_t len{0}; - /** Document passed to stage 2 */ - dom::document *doc{}; - - inline dom_parser_implementation() noexcept; - inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; - inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; - dom_parser_implementation(const dom_parser_implementation &) = delete; - dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; - - simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; - simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; - simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; - inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; - inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; -private: - simdjson_really_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); - -}; - -} // namespace westmere -} // namespace simdjson - -namespace simdjson { -namespace westmere { - -inline dom_parser_implementation::dom_parser_implementation() noexcept = default; -inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; -inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; - -// Leaving these here so they can be inlined if so desired -inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { - if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } - // Stage 1 index output - size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; - structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); - if (!structural_indexes) { _capacity = 0; return MEMALLOC; } - structural_indexes[0] = 0; - n_structural_indexes = 0; - - _capacity = capacity; - return SUCCESS; -} - -inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { - // Stage 2 stacks - open_containers.reset(new (std::nothrow) open_container[max_depth]); - is_array.reset(new (std::nothrow) bool[max_depth]); - if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } - - _max_depth = max_depth; - return SUCCESS; -} - -} // namespace westmere -} // namespace simdjson -/* end file include/simdjson/generic/dom_parser_implementation.h */ -/* begin file include/simdjson/westmere/bitmanipulation.h */ -#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H -#define SIMDJSON_WESTMERE_BITMANIPULATION_H - -namespace simdjson { -namespace westmere { -namespace { - -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -SIMDJSON_NO_SANITIZE_UNDEFINED -simdjson_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDJSON_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDJSON_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); -} - -/* result might be undefined when input_num is zero */ -simdjson_really_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDJSON_REGULAR_VISUAL_STUDIO -} - -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO -simdjson_really_inline unsigned __int64 count_ones(uint64_t input_num) { - // note: we do not support legacy 32-bit Windows - return __popcnt64(input_num);// Visual Studio wants two underscores -} -#else -simdjson_really_inline long long int count_ones(uint64_t input_num) { - return _popcnt64(input_num); -} -#endif - -simdjson_really_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} - -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H -/* end file include/simdjson/westmere/bitmanipulation.h */ -/* begin file include/simdjson/westmere/bitmask.h */ -#ifndef SIMDJSON_WESTMERE_BITMASK_H -#define SIMDJSON_WESTMERE_BITMASK_H - -namespace simdjson { -namespace westmere { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdjson_really_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processing supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} - -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_WESTMERE_BITMASK_H -/* end file include/simdjson/westmere/bitmask.h */ -/* begin file include/simdjson/westmere/simd.h */ -#ifndef SIMDJSON_WESTMERE_SIMD_H -#define SIMDJSON_WESTMERE_SIMD_H - - -namespace simdjson { -namespace westmere { -namespace { -namespace simd { - - template - struct base { - __m128i value; - - // Zero constructor - simdjson_really_inline base() : value{__m128i()} {} - - // Conversion from SIMD register - simdjson_really_inline base(const __m128i _value) : value(_value) {} - - // Conversion to SIMD register - simdjson_really_inline operator const __m128i&() const { return this->value; } - simdjson_really_inline operator __m128i&() { return this->value; } - - // Bit operations - simdjson_really_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } - simdjson_really_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } - simdjson_really_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } - simdjson_really_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } - simdjson_really_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } - simdjson_really_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } - simdjson_really_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } - }; - - // Forward-declared so they can be used by splat and friends. - template - struct simd8; - - template> - struct base8: base> { - typedef uint16_t bitmask_t; - typedef uint32_t bitmask2_t; - - simdjson_really_inline base8() : base>() {} - simdjson_really_inline base8(const __m128i _value) : base>(_value) {} - - friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } - - static const int SIZE = sizeof(base>::value); - - template - simdjson_really_inline simd8 prev(const simd8 prev_chunk) const { - return _mm_alignr_epi8(*this, prev_chunk, 16 - N); - } - }; - - // SIMD byte mask type (returned by things like eq and gt) - template<> - struct simd8: base8 { - static simdjson_really_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } - - simdjson_really_inline simd8() : base8() {} - simdjson_really_inline simd8(const __m128i _value) : base8(_value) {} - // Splat constructor - simdjson_really_inline simd8(bool _value) : base8(splat(_value)) {} - - simdjson_really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } - simdjson_really_inline bool any() const { return !_mm_testz_si128(*this, *this); } - simdjson_really_inline simd8 operator~() const { return *this ^ true; } - }; - - template - struct base8_numeric: base8 { - static simdjson_really_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } - static simdjson_really_inline simd8 zero() { return _mm_setzero_si128(); } - static simdjson_really_inline simd8 load(const T values[16]) { - return _mm_loadu_si128(reinterpret_cast(values)); - } - // Repeat 16 values as many times as necessary (usually for lookup tables) - static simdjson_really_inline simd8 repeat_16( - T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, - T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - simdjson_really_inline base8_numeric() : base8() {} - simdjson_really_inline base8_numeric(const __m128i _value) : base8(_value) {} - - // Store to array - simdjson_really_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } - - // Override to distinguish from bool version - simdjson_really_inline simd8 operator~() const { return *this ^ 0xFFu; } - - // Addition/subtraction are the same for signed and unsigned - simdjson_really_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } - simdjson_really_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } - simdjson_really_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } - simdjson_really_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } - - // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) - template - simdjson_really_inline simd8 lookup_16(simd8 lookup_table) const { - return _mm_shuffle_epi8(lookup_table, *this); - } - - // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). - // Passing a 0 value for mask would be equivalent to writing out every byte to output. - // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes - // get written. - // Design consideration: it seems like a function with the - // signature simd8 compress(uint32_t mask) would be - // sensible, but the AVX ISA makes this kind of approach difficult. - template - simdjson_really_inline void compress(uint16_t mask, L * output) const { - using internal::thintable_epi8; - using internal::BitsSetTable256mul2; - using internal::pshufb_combine_table; - // this particular implementation was inspired by work done by @animetosho - // we do it in two steps, first 8 bytes and then second 8 bytes - uint8_t mask1 = uint8_t(mask); // least significant 8 bits - uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits - // next line just loads the 64-bit values thintable_epi8[mask1] and - // thintable_epi8[mask2] into a 128-bit register, using only - // two instructions on most compilers. - __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); - // we increment by 0x08 the second half of the mask - shufmask = - _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); - // this is the version "nearly pruned" - __m128i pruned = _mm_shuffle_epi8(*this, shufmask); - // we still need to put the two halves together. - // we compute the popcount of the first half: - int pop1 = BitsSetTable256mul2[mask1]; - // then load the corresponding mask, what it does is to write - // only the first pop1 bytes from the first 8 bytes, and then - // it fills in with the bytes from the second 8 bytes + some filling - // at the end. - __m128i compactmask = - _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); - __m128i answer = _mm_shuffle_epi8(pruned, compactmask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); - } - - template - simdjson_really_inline simd8 lookup_16( - L replace0, L replace1, L replace2, L replace3, - L replace4, L replace5, L replace6, L replace7, - L replace8, L replace9, L replace10, L replace11, - L replace12, L replace13, L replace14, L replace15) const { - return lookup_16(simd8::repeat_16( - replace0, replace1, replace2, replace3, - replace4, replace5, replace6, replace7, - replace8, replace9, replace10, replace11, - replace12, replace13, replace14, replace15 - )); - } - }; - - // Signed bytes - template<> - struct simd8 : base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_really_inline simd8(int8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_really_inline simd8(const int8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_really_inline simd8( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( - int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, - int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Order-sensitive comparisons - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } - simdjson_really_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } - }; - - // Unsigned bytes - template<> - struct simd8: base8_numeric { - simdjson_really_inline simd8() : base8_numeric() {} - simdjson_really_inline simd8(const __m128i _value) : base8_numeric(_value) {} - // Splat constructor - simdjson_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {} - // Array constructor - simdjson_really_inline simd8(const uint8_t* values) : simd8(load(values)) {} - // Member-by-member initialization - simdjson_really_inline simd8( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) : simd8(_mm_setr_epi8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - )) {} - // Repeat 16 values as many times as necessary (usually for lookup tables) - simdjson_really_inline static simd8 repeat_16( - uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, - uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 - ) { - return simd8( - v0, v1, v2, v3, v4, v5, v6, v7, - v8, v9, v10,v11,v12,v13,v14,v15 - ); - } - - // Saturated math - simdjson_really_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } - simdjson_really_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } - - // Order-specific operations - simdjson_really_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } - simdjson_really_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } - // Same as >, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } - // Same as <, but only guarantees true is nonzero (< guarantees true = -1) - simdjson_really_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } - simdjson_really_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } - simdjson_really_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } - simdjson_really_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - simdjson_really_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } - - // Bit-specific operations - simdjson_really_inline simd8 bits_not_set() const { return *this == uint8_t(0); } - simdjson_really_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } - simdjson_really_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } - simdjson_really_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } - simdjson_really_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } - simdjson_really_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } - simdjson_really_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } - simdjson_really_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } - simdjson_really_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } - template - simdjson_really_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } - template - simdjson_really_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } - // Get one of the bits and make a bitmask out of it. - // e.g. value.get_bit<7>() gets the high bit - template - simdjson_really_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } - }; - - template - struct simd8x64 { - static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); - static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); - const simd8 chunks[NUM_CHUNKS]; - - simd8x64(const simd8x64& o) = delete; // no copy allowed - simd8x64& operator=(const simd8& other) = delete; // no assignment allowed - simd8x64() = delete; // no default constructor allowed - - simdjson_really_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} - simdjson_really_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} - - simdjson_really_inline void store(T ptr[64]) const { - this->chunks[0].store(ptr+sizeof(simd8)*0); - this->chunks[1].store(ptr+sizeof(simd8)*1); - this->chunks[2].store(ptr+sizeof(simd8)*2); - this->chunks[3].store(ptr+sizeof(simd8)*3); - } - - simdjson_really_inline simd8 reduce_or() const { - return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); - } - - simdjson_really_inline uint64_t compress(uint64_t mask, T * output) const { - this->chunks[0].compress(uint16_t(mask), output); - this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); - this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); - this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); - return 64 - count_ones(mask); - } - - simdjson_really_inline uint64_t to_bitmask() const { - uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); - uint64_t r1 = this->chunks[1].to_bitmask() ; - uint64_t r2 = this->chunks[2].to_bitmask() ; - uint64_t r3 = this->chunks[3].to_bitmask() ; - return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); - } - - simdjson_really_inline uint64_t eq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] == mask, - this->chunks[1] == mask, - this->chunks[2] == mask, - this->chunks[3] == mask - ).to_bitmask(); - } - - simdjson_really_inline uint64_t eq(const simd8x64 &other) const { - return simd8x64( - this->chunks[0] == other.chunks[0], - this->chunks[1] == other.chunks[1], - this->chunks[2] == other.chunks[2], - this->chunks[3] == other.chunks[3] - ).to_bitmask(); - } - - simdjson_really_inline uint64_t lteq(const T m) const { - const simd8 mask = simd8::splat(m); - return simd8x64( - this->chunks[0] <= mask, - this->chunks[1] <= mask, - this->chunks[2] <= mask, - this->chunks[3] <= mask - ).to_bitmask(); - } - }; // struct simd8x64 - -} // namespace simd -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H -/* end file include/simdjson/westmere/simd.h */ -/* begin file include/simdjson/generic/jsoncharutils.h */ - -namespace simdjson { -namespace westmere { -namespace { -namespace jsoncharutils { - -// return non-zero if not a structural or whitespace char -// zero otherwise -simdjson_really_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace_negated[c]; -} - -simdjson_really_inline uint32_t is_structural_or_whitespace(uint8_t c) { - return internal::structural_or_whitespace[c]; -} - -// returns a value with the high 16 bits set if not valid -// otherwise returns the conversion of the 4 hex digits at src into the bottom -// 16 bits of the 32-bit return register -// -// see -// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ -static inline uint32_t hex_to_u32_nocheck( - const uint8_t *src) { // strictly speaking, static inline is a C-ism - uint32_t v1 = internal::digit_to_val32[630 + src[0]]; - uint32_t v2 = internal::digit_to_val32[420 + src[1]]; - uint32_t v3 = internal::digit_to_val32[210 + src[2]]; - uint32_t v4 = internal::digit_to_val32[0 + src[3]]; - return v1 | v2 | v3 | v4; -} - -// given a code point cp, writes to c -// the utf-8 code, outputting the length in -// bytes, if the length is zero, the code point -// is invalid -// -// This can possibly be made faster using pdep -// and clz and table lookups, but JSON documents -// have few escaped code points, and the following -// function looks cheap. -// -// Note: we assume that surrogates are treated separately -// -simdjson_really_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { - if (cp <= 0x7F) { - c[0] = uint8_t(cp); - return 1; // ascii - } - if (cp <= 0x7FF) { - c[0] = uint8_t((cp >> 6) + 192); - c[1] = uint8_t((cp & 63) + 128); - return 2; // universal plane - // Surrogates are treated elsewhere... - //} //else if (0xd800 <= cp && cp <= 0xdfff) { - // return 0; // surrogates // could put assert here - } else if (cp <= 0xFFFF) { - c[0] = uint8_t((cp >> 12) + 224); - c[1] = uint8_t(((cp >> 6) & 63) + 128); - c[2] = uint8_t((cp & 63) + 128); - return 3; - } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this - // is not needed - c[0] = uint8_t((cp >> 18) + 240); - c[1] = uint8_t(((cp >> 12) & 63) + 128); - c[2] = uint8_t(((cp >> 6) & 63) + 128); - c[3] = uint8_t((cp & 63) + 128); - return 4; - } - // will return 0 when the code point was too large. - return 0; // bad r -} - -#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm -// this is a slow emulation routine for 32-bit -// -static simdjson_really_inline uint64_t __emulu(uint32_t x, uint32_t y) { - return x * (uint64_t)y; -} -static simdjson_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { - uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); - uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); - uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); - uint64_t adbc_carry = !!(adbc < ad); - uint64_t lo = bd + (adbc << 32); - *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + - (adbc_carry << 32) + !!(lo < bd); - return lo; -} -#endif - -using internal::value128; - -simdjson_really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { - value128 answer; -#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) -#ifdef _M_ARM64 - // ARM64 has native support for 64-bit multiplications, no need to emultate - answer.high = __umulh(value1, value2); - answer.low = value1 * value2; -#else - answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 -#endif // _M_ARM64 -#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) - __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; - answer.low = uint64_t(r); - answer.high = uint64_t(r >> 64); -#endif - return answer; -} - -} // namespace jsoncharutils -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file include/simdjson/generic/jsoncharutils.h */ -/* begin file include/simdjson/generic/atomparsing.h */ -namespace simdjson { -namespace westmere { -namespace { -/// @private -namespace atomparsing { - -// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. -// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot -// be certain that the character pointer will be properly aligned. -// You might think that using memcpy makes this function expensive, but you'd be wrong. -// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); -// to the compile-time constant 1936482662. -simdjson_really_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } - - -// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. -// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. -simdjson_warn_unused -simdjson_really_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { - uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) - static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); - std::memcpy(&srcval, src, sizeof(uint32_t)); - return srcval ^ string_to_uint32(atom); -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src) { - return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_true_atom(src); } - else if (len == 4) { return !str4ncmp(src, "true"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src) { - return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { - if (len > 5) { return is_valid_false_atom(src); } - else if (len == 5) { return !str4ncmp(src+1, "alse"); } - else { return false; } -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src) { - return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; -} - -simdjson_warn_unused -simdjson_really_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { - if (len > 4) { return is_valid_null_atom(src); } - else if (len == 4) { return !str4ncmp(src, "null"); } - else { return false; } -} - -} // namespace atomparsing -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file include/simdjson/generic/atomparsing.h */ -/* begin file include/simdjson/westmere/stringparsing.h */ -#ifndef SIMDJSON_WESTMERE_STRINGPARSING_H -#define SIMDJSON_WESTMERE_STRINGPARSING_H - -namespace simdjson { -namespace westmere { -namespace { - -using namespace simd; - -// Holds backslashes and quotes locations. -struct backslash_and_quote { -public: - static constexpr uint32_t BYTES_PROCESSED = 32; - simdjson_really_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); - - simdjson_really_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } - simdjson_really_inline bool has_backslash() { return bs_bits != 0; } - simdjson_really_inline int quote_index() { return trailing_zeroes(quote_bits); } - simdjson_really_inline int backslash_index() { return trailing_zeroes(bs_bits); } - - uint32_t bs_bits; - uint32_t quote_bits; -}; // struct backslash_and_quote - -simdjson_really_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { - // this can read up to 31 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); - simd8 v0(src); - simd8 v1(src + 16); - v0.store(dst); - v1.store(dst + 16); - uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); - return { - uint32_t(bs_and_quote), // bs_bits - uint32_t(bs_and_quote >> 32) // quote_bits - }; -} - -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -/* begin file include/simdjson/generic/stringparsing.h */ -// This file contains the common code every implementation uses -// It is intended to be included multiple times and compiled multiple times - -namespace simdjson { -namespace westmere { -namespace { -/// @private -namespace stringparsing { - -// begin copypasta -// These chars yield themselves: " \ / -// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab -// u not handled in this table as it's complex -static const uint8_t escape_map[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. - 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. - 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -// handle a unicode codepoint -// write appropriate values into dest -// src will advance 6 bytes or 12 bytes -// dest will advance a variable amount (return via pointer) -// return true if the unicode codepoint was valid -// We work in little-endian then swap at write time -simdjson_warn_unused -simdjson_really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, - uint8_t **dst_ptr) { - // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the - // conversion isn't valid; we defer the check for this to inside the - // multilingual plane check - uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - *src_ptr += 6; - // check for low surrogate for characters outside the Basic - // Multilingual Plane. - if (code_point >= 0xd800 && code_point < 0xdc00) { - if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') { - return false; - } - uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); - - // if the first code point is invalid we will get here, as we will go past - // the check for being outside the Basic Multilingual plane. If we don't - // find a \u immediately afterwards we fail out anyhow, but if we do, - // this check catches both the case of the first code point being invalid - // or the second code point being invalid. - if ((code_point | code_point_2) >> 16) { - return false; - } - - code_point = - (((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000; - *src_ptr += 6; - } - size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); - *dst_ptr += offset; - return offset > 0; -} - -/** - * Unescape a string from src to dst, stopping at a final unescaped quote. E.g., if src points at 'joe"', then - * dst needs to have four free bytes. - */ -simdjson_warn_unused simdjson_really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { - while (1) { - // Copy the next n bytes, and find the backslash and quote in them. - auto bs_quote = backslash_and_quote::copy_and_find(src, dst); - // If the next thing is the end quote, copy and return - if (bs_quote.has_quote_first()) { - // we encountered quotes first. Move dst to point to quotes and exit - return dst + bs_quote.quote_index(); - } - if (bs_quote.has_backslash()) { - /* find out where the backspace is */ - auto bs_dist = bs_quote.backslash_index(); - uint8_t escape_char = src[bs_dist + 1]; - /* we encountered backslash first. Handle backslash */ - if (escape_char == 'u') { - /* move src/dst up to the start; they will be further adjusted - within the unicode codepoint handling code. */ - src += bs_dist; - dst += bs_dist; - if (!handle_unicode_codepoint(&src, &dst)) { - return nullptr; - } - } else { - /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and - * write bs_dist+1 characters to output - * note this may reach beyond the part of the buffer we've actually - * seen. I think this is ok */ - uint8_t escape_result = escape_map[escape_char]; - if (escape_result == 0u) { - return nullptr; /* bogus escape value is an error */ - } - dst[bs_dist] = escape_result; - src += bs_dist + 2; - dst += bs_dist + 1; - } - } else { - /* they are the same. Since they can't co-occur, it means we - * encountered neither. */ - src += backslash_and_quote::BYTES_PROCESSED; - dst += backslash_and_quote::BYTES_PROCESSED; - } - } - /* can't be reached */ - return nullptr; -} - -simdjson_unused simdjson_warn_unused simdjson_really_inline error_code parse_string_to_buffer(const uint8_t *src, uint8_t *¤t_string_buf_loc, std::string_view &s) { - if (*(src++) != '"') { return STRING_ERROR; } - auto end = stringparsing::parse_string(src, current_string_buf_loc); - if (!end) { return STRING_ERROR; } - s = std::string_view(reinterpret_cast(current_string_buf_loc), end-current_string_buf_loc); - current_string_buf_loc = end; - return SUCCESS; -} - -} // namespace stringparsing -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file include/simdjson/generic/stringparsing.h */ - -#endif // SIMDJSON_WESTMERE_STRINGPARSING_H -/* end file include/simdjson/westmere/stringparsing.h */ -/* begin file include/simdjson/westmere/numberparsing.h */ -#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_H -#define SIMDJSON_WESTMERE_NUMBERPARSING_H - -namespace simdjson { -namespace westmere { -namespace { - -static simdjson_really_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { - // this actually computes *16* values so we are being wasteful. - const __m128i ascii0 = _mm_set1_epi8('0'); - const __m128i mul_1_10 = - _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); - const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); - const __m128i mul_1_10000 = - _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); - const __m128i input = _mm_sub_epi8( - _mm_loadu_si128(reinterpret_cast(chars)), ascii0); - const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); - const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); - const __m128i t3 = _mm_packus_epi32(t2, t2); - const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); - return _mm_cvtsi128_si32( - t4); // only captures the sum of the first 8 digits, drop the rest -} - -} // unnamed namespace -} // namespace westmere -} // namespace simdjson - -#define SIMDJSON_SWAR_NUMBER_PARSING 1 - -/* begin file include/simdjson/generic/numberparsing.h */ -#include - -namespace simdjson { -namespace westmere { - -namespace ondemand { -/** - * The type of a JSON number - */ -enum class number_type { - floating_point_number=1, /// a binary64 number - signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - unsigned_integer /// a positive integer larger or equal to 1<<63 -}; -} - -namespace { -/// @private -namespace numberparsing { - - - -#ifdef JSON_TEST_NUMBERS -#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) -#else -#define INVALID_NUMBER(SRC) (NUMBER_ERROR) -#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) -#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) -#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) -#endif - -namespace { -// Convert a mantissa, an exponent and a sign bit into an ieee64 double. -// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). -// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. -simdjson_really_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { - double d; - mantissa &= ~(1ULL << 52); - mantissa |= real_exponent << 52; - mantissa |= ((static_cast(negative)) << 63); - std::memcpy(&d, &mantissa, sizeof(d)); - return d; -} -} -// Attempts to compute i * 10^(power) exactly; and if "negative" is -// true, negate the result. -// This function will only work in some cases, when it does not work, success is -// set to false. This should work *most of the time* (like 99% of the time). -// We assume that power is in the [smallest_power, -// largest_power] interval: the caller is responsible for this check. -simdjson_really_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { - // we start with a fast path - // It was described in - // Clinger WD. How to read floating point numbers accurately. - // ACM SIGPLAN Notices. 1990 -#ifndef FLT_EVAL_METHOD -#error "FLT_EVAL_METHOD should be defined, please include cfloat." -#endif -#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) - // We cannot be certain that x/y is rounded to nearest. - if (0 <= power && power <= 22 && i <= 9007199254740991) { -#else - if (-22 <= power && power <= 22 && i <= 9007199254740991) { -#endif - // convert the integer into a double. This is lossless since - // 0 <= i <= 2^53 - 1. - d = double(i); - // - // The general idea is as follows. - // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then - // 1) Both s and p can be represented exactly as 64-bit floating-point - // values - // (binary64). - // 2) Because s and p can be represented exactly as floating-point values, - // then s * p - // and s / p will produce correctly rounded values. - // - if (power < 0) { - d = d / simdjson::internal::power_of_ten[-power]; - } else { - d = d * simdjson::internal::power_of_ten[power]; - } - if (negative) { - d = -d; - } - return true; - } - // When 22 < power && power < 22 + 16, we could - // hope for another, secondary fast path. It was - // described by David M. Gay in "Correctly rounded - // binary-decimal and decimal-binary conversions." (1990) - // If you need to compute i * 10^(22 + x) for x < 16, - // first compute i * 10^x, if you know that result is exact - // (e.g., when i * 10^x < 2^53), - // then you can still proceed and do (i * 10^x) * 10^22. - // Is this worth your time? - // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) - // for this second fast path to work. - // If you you have 22 < power *and* power < 22 + 16, and then you - // optimistically compute "i * 10^(x-22)", there is still a chance that you - // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of - // this optimization maybe less common than we would like. Source: - // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ - // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html - - // The fast path has now failed, so we are failing back on the slower path. - - // In the slow path, we need to adjust i so that it is > 1<<63 which is always - // possible, except if i == 0, so we handle i == 0 separately. - if(i == 0) { - d = 0.0; - return true; - } - - - // The exponent is 1024 + 63 + power - // + floor(log(5**power)/log(2)). - // The 1024 comes from the ieee64 standard. - // The 63 comes from the fact that we use a 64-bit word. - // - // Computing floor(log(5**power)/log(2)) could be - // slow. Instead we use a fast function. - // - // For power in (-400,350), we have that - // (((152170 + 65536) * power ) >> 16); - // is equal to - // floor(log(5**power)/log(2)) + power when power >= 0 - // and it is equal to - // ceil(log(5**-power)/log(2)) + power when power < 0 - // - // The 65536 is (1<<16) and corresponds to - // (65536 * power) >> 16 ---> power - // - // ((152170 * power ) >> 16) is equal to - // floor(log(5**power)/log(2)) - // - // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). - // The 1<<16 value is a power of two; we could use a - // larger power of 2 if we wanted to. - // - int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; - - - // We want the most significant bit of i to be 1. Shift if needed. - int lz = leading_zeroes(i); - i <<= lz; - - - // We are going to need to do some 64-bit arithmetic to get a precise product. - // We use a table lookup approach. - // It is safe because - // power >= smallest_power - // and power <= largest_power - // We recover the mantissa of the power, it has a leading 1. It is always - // rounded down. - // - // We want the most significant 64 bits of the product. We know - // this will be non-zero because the most significant bit of i is - // 1. - const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); - // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); - // Both i and power_of_five_128[index] have their most significant bit set to 1 which - // implies that the either the most or the second most significant bit of the product - // is 1. We pack values in this manner for efficiency reasons: it maximizes the use - // we make of the product. It also makes it easy to reason about the product: there - // is 0 or 1 leading zero in the product. - - // Unless the least significant 9 bits of the high (64-bit) part of the full - // product are all 1s, then we know that the most significant 55 bits are - // exact and no further work is needed. Having 55 bits is necessary because - // we need 53 bits for the mantissa but we have to have one rounding bit and - // we can waste a bit if the most significant bit of the product is zero. - if((firstproduct.high & 0x1FF) == 0x1FF) { - // We want to compute i * 5^q, but only care about the top 55 bits at most. - // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing - // the full computation is wasteful. So we do what is called a "truncated - // multiplication". - // We take the most significant 64-bits, and we put them in - // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q - // to the desired approximation using one multiplication. Sometimes it does not suffice. - // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and - // then we get a better approximation to i * 5^q. In very rare cases, even that - // will not suffice, though it is seemingly very hard to find such a scenario. - // - // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat - // more complicated. - // - // There is an extra layer of complexity in that we need more than 55 bits of - // accuracy in the round-to-even scenario. - // - // The full_multiplication function computes the 128-bit product of two 64-bit words - // with a returned value of type value128 with a "low component" corresponding to the - // 64-bit least significant bits of the product and with a "high component" corresponding - // to the 64-bit most significant bits of the product. - simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); - firstproduct.low += secondproduct.high; - if(secondproduct.high > firstproduct.low) { firstproduct.high++; } - // At this point, we might need to add at most one to firstproduct, but this - // can only change the value of firstproduct.high if firstproduct.low is maximal. - if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { - // This is very unlikely, but if so, we need to do much more work! - return false; - } - } - uint64_t lower = firstproduct.low; - uint64_t upper = firstproduct.high; - // The final mantissa should be 53 bits with a leading 1. - // We shift it so that it occupies 54 bits with a leading 1. - /////// - uint64_t upperbit = upper >> 63; - uint64_t mantissa = upper >> (upperbit + 9); - lz += int(1 ^ upperbit); - - // Here we have mantissa < (1<<54). - int64_t real_exponent = exponent - lz; - if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? - // Here have that real_exponent <= 0 so -real_exponent >= 0 - if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. - d = 0.0; - return true; - } - // next line is safe because -real_exponent + 1 < 0 - mantissa >>= -real_exponent + 1; - // Thankfully, we can't have both "round-to-even" and subnormals because - // "round-to-even" only occurs for powers close to 0. - mantissa += (mantissa & 1); // round up - mantissa >>= 1; - // There is a weird scenario where we don't have a subnormal but just. - // Suppose we start with 2.2250738585072013e-308, we end up - // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal - // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round - // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer - // subnormal, but we can only know this after rounding. - // So we only declare a subnormal if we are smaller than the threshold. - real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; - d = to_double(mantissa, real_exponent, negative); - return true; - } - // We have to round to even. The "to even" part - // is only a problem when we are right in between two floats - // which we guard against. - // If we have lots of trailing zeros, we may fall right between two - // floating-point values. - // - // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] - // times a power of two. That is, it is right between a number with binary significand - // m and another number with binary significand m+1; and it must be the case - // that it cannot be represented by a float itself. - // - // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. - // Recall that 10^q = 5^q * 2^q. - // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that - // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. - // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so - // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have - // 2^{53} x 5^{-q} < 2^{64}. - // Hence we have 5^{-q} < 2^{11}$ or q>= -4. - // - // We require lower <= 1 and not lower == 0 because we could not prove that - // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. - if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { - if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { - mantissa &= ~1; // flip it so that we do not round up - } - } - - mantissa += mantissa & 1; - mantissa >>= 1; - - // Here we have mantissa < (1<<53), unless there was an overflow - if (mantissa >= (1ULL << 53)) { - ////////// - // This will happen when parsing values such as 7.2057594037927933e+16 - //////// - mantissa = (1ULL << 52); - real_exponent++; - } - mantissa &= ~(1ULL << 52); - // we have to check that real_exponent is in range, otherwise we bail out - if (simdjson_unlikely(real_exponent > 2046)) { - // We have an infinite value!!! We could actually throw an error here if we could. - return false; - } - d = to_double(mantissa, real_exponent, negative); - return true; -} - -// We call a fallback floating-point parser that might be slow. Note -// it will accept JSON numbers, but the JSON spec. is more restrictive so -// before you call parse_float_fallback, you need to have validated the input -// string with the JSON grammar. -// It will return an error (false) if the parsed number is infinite. -// The string parsing itself always succeeds. We know that there is at least -// one digit. -static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} -static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { - *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); - // We do not accept infinite values. - - // Detecting finite values in a portable manner is ridiculously hard, ideally - // we would want to do: - // return !std::isfinite(*outDouble); - // but that mysteriously fails under legacy/old libc++ libraries, see - // https://github.com/simdjson/simdjson/issues/1286 - // - // Therefore, fall back to this solution (the extra parens are there - // to handle that max may be a macro on windows). - return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); -} - -// check quickly whether the next 8 chars are made of digits -// at a glance, it looks better than Mula's -// http://0x80.pl/articles/swar-digits-validate.html -simdjson_really_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { - uint64_t val; - // this can read up to 7 bytes beyond the buffer size, but we require - // SIMDJSON_PADDING of padding - static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); - std::memcpy(&val, chars, 8); - // a branchy method might be faster: - // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) - // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == - // 0x3030303030303030); - return (((val & 0xF0F0F0F0F0F0F0F0) | - (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == - 0x3333333333333333); -} - -template -error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { - double d; - if (parse_float_fallback(src, &d)) { - writer.append_double(d); - return SUCCESS; - } - return INVALID_NUMBER(src); -} - -template -SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later -simdjson_really_inline bool parse_digit(const uint8_t c, I &i) { - const uint8_t digit = static_cast(c - '0'); - if (digit > 9) { - return false; - } - // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - return true; -} - -simdjson_really_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { - // we continue with the fiction that we have an integer. If the - // floating point number is representable as x * 10^z for some integer - // z that fits in 53 bits, then we will be able to convert back the - // the integer into a float in a lossless manner. - const uint8_t *const first_after_period = p; - -#ifdef SIMDJSON_SWAR_NUMBER_PARSING -#if SIMDJSON_SWAR_NUMBER_PARSING - // this helps if we have lots of decimals! - // this turns out to be frequent enough. - if (is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); - p += 8; - } -#endif // SIMDJSON_SWAR_NUMBER_PARSING -#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING - // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) - if (parse_digit(*p, i)) { ++p; } - while (parse_digit(*p, i)) { p++; } - exponent = first_after_period - p; - // Decimal without digits (123.) is illegal - if (exponent == 0) { - return INVALID_NUMBER(src); - } - return SUCCESS; -} - -simdjson_really_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { - // Exp Sign: -123.456e[-]78 - bool neg_exp = ('-' == *p); - if (neg_exp || '+' == *p) { p++; } // Skip + as well - - // Exponent: -123.456e-[78] - auto start_exp = p; - int64_t exp_number = 0; - while (parse_digit(*p, exp_number)) { ++p; } - // It is possible for parse_digit to overflow. - // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. - // Thus we *must* check for possible overflow before we negate exp_number. - - // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into - // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may - // not oblige and may, in fact, generate two distinct paths in any case. It might be - // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off - // instructions for a simdjson_likely branch, an unconclusive gain. - - // If there were no digits, it's an error. - if (simdjson_unlikely(p == start_exp)) { - return INVALID_NUMBER(src); - } - // We have a valid positive exponent in exp_number at this point, except that - // it may have overflowed. - - // If there were more than 18 digits, we may have overflowed the integer. We have to do - // something!!!! - if (simdjson_unlikely(p > start_exp+18)) { - // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow - while (*start_exp == '0') { start_exp++; } - // 19 digits could overflow int64_t and is kind of absurd anyway. We don't - // support exponents smaller than -999,999,999,999,999,999 and bigger - // than 999,999,999,999,999,999. - // We can truncate. - // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before - // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could - // truncate at 324. - // Note that there is no reason to fail per se at this point in time. - // E.g., 0e999999999999999999999 is a fine number. - if (p > start_exp+18) { exp_number = 999999999999999999; } - } - // At this point, we know that exp_number is a sane, positive, signed integer. - // It is <= 999,999,999,999,999,999. As long as 'exponent' is in - // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' - // is bounded in magnitude by the size of the JSON input, we are fine in this universe. - // To sum it up: the next line should never overflow. - exponent += (neg_exp ? -exp_number : exp_number); - return SUCCESS; -} - -simdjson_really_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const uint8_t *start = start_digits; - while ((*start == '0') || (*start == '.')) { ++start; } - // we over-decrement by one when there is a '.' - return digit_count - size_t(start - start_digits); -} - -template -simdjson_really_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon in practice. - // - // 9999999999999999999 < 2**64 so we can accommodate 19 digits. - // If we have a decimal separator, then digit_count - 1 is the number of digits, but we - // may not have a decimal separator! - if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { - // Ok, chances are good that we had an overflow! - // this is almost never going to get called!!! - // we start anew, going slowly!!! - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens - // because slow_float_parsing is a non-inlined function. If we passed our writer reference to - // it, it would force it to be stored in memory, preventing the compiler from picking it apart - // and putting into registers. i.e. if we pass it as reference, it gets slow. - // This is what forces the skip_double, as well. - error_code error = slow_float_parsing(src, writer); - writer.skip_double(); - return error; - } - // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other - // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 - // To future reader: we'd love if someone found a better way, or at least could explain this result! - if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { - // - // Important: smallest_power is such that it leads to a zero value. - // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero - // so something x 10^-343 goes to zero, but not so with something x 10^-342. - static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); - // - if((exponent < simdjson::internal::smallest_power) || (i == 0)) { - WRITE_DOUBLE(0, src, writer); - return SUCCESS; - } else { // (exponent > largest_power) and (i != 0) - // We have, for sure, an infinite value and simdjson refuses to parse infinite values. - return INVALID_NUMBER(src); - } - } - double d; - if (!compute_float_64(exponent, i, negative, d)) { - // we are almost never going to get here. - if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } - } - WRITE_DOUBLE(d, src, writer); - return SUCCESS; -} - -// for performance analysis, it is sometimes useful to skip parsing -#ifdef SIMDJSON_SKIPNUMBERPARSING - -template -simdjson_really_inline error_code parse_number(const uint8_t *const, W &writer) { - writer.append_s64(0); // always write zero - return SUCCESS; // always succeeds -} - -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } -#else - -// parse the number at src -// define JSON_TEST_NUMBERS for unit testing -// -// It is assumed that the number is followed by a structural ({,},],[) character -// or a white space character. If that is not the case (e.g., when the JSON -// document is made of a single number), then it is necessary to copy the -// content and append a space before calling this function. -// -// Our objective is accurate parsing (ULP of 0) at high speed. -template -simdjson_really_inline error_code parse_number(const uint8_t *const src, W &writer) { - - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } - - // - // Handle floats if there is a . or e (or both) - // - int64_t exponent = 0; - bool is_float = false; - if ('.' == *p) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); - digit_count = int(p - start_digits); // used later to guard against overflows - } - if (('e' == *p) || ('E' == *p)) { - is_float = true; - ++p; - SIMDJSON_TRY( parse_exponent(src, p, exponent) ); - } - if (is_float) { - const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); - SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); - if (dirty_end) { return INVALID_NUMBER(src); } - return SUCCESS; - } - - // The longest negative 64-bit number is 19 digits. - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - size_t longest_digit_count = negative ? 19 : 20; - if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } - if (digit_count == longest_digit_count) { - if (negative) { - // Anything negative above INT64_MAX+1 is invalid - if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } - WRITE_INTEGER(~i+1, src, writer); - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } - } - - // Write unsigned if it doesn't fit in a signed integer. - if (i > uint64_t(INT64_MAX)) { - WRITE_UNSIGNED(i, src, writer); - } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); - } - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } - return SUCCESS; -} - -// Inlineable functions -namespace { - -// This table can be used to characterize the final character of an integer -// string. For JSON structural character and allowable white space characters, -// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise -// we return NUMBER_ERROR. -// Optimization note: we could easily reduce the size of the table by half (to 128) -// at the cost of an extra branch. -// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): -static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); -static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); - -const uint8_t integer_string_finisher[256] = { - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, - SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, - NUMBER_ERROR}; - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - - -// Parse any number from 0 to 18,446,744,073,709,551,615 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { - const uint8_t *p = src; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from 0 to 18,446,744,073,709,551,615 -simdjson_unused simdjson_really_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { - const uint8_t *p = src + 1; - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // The longest positive 64-bit number is 20 digits. - // We do it this way so we don't trigger this branch unless we must. - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > 20)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if (*p != '"') { return NUMBER_ERROR; } - - if (digit_count == 20) { - // Positive overflow check: - // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the - // biggest uint64_t. - // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. - // If we got here, it's a 20 digit number starting with the digit "1". - // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller - // than 1,553,255,926,290,448,384. - // - That is smaller than the smallest possible 20-digit number the user could write: - // 10,000,000,000,000,000,000. - // - Therefore, if the number is positive and lower than that, it's overflow. - // - The value we are looking at is less than or equal to INT64_MAX. - // - // Note: we use src[1] and not src[0] because src[0] is the quote character in this - // instance. - if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } - } - - return i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { - // - // Check for minus sign - // - if(src == src_end) { return NUMBER_ERROR; } - bool negative = (*src == '-'); - const uint8_t *p = src + negative; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 -simdjson_unused simdjson_really_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - const uint8_t *p = src + negative + 1; - - // - // Parse the integer part. - // - // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare - const uint8_t *const start_digits = p; - uint64_t i = 0; - while (parse_digit(*p, i)) { p++; } - - // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. - // Optimization note: size_t is expected to be unsigned. - size_t digit_count = size_t(p - start_digits); - // We go from - // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 - // so we can never represent numbers that have more than 19 digits. - size_t longest_digit_count = 19; - // Optimization note: the compiler can probably merge - // ((digit_count == 0) || (digit_count > longest_digit_count)) - // into a single branch since digit_count is unsigned. - if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } - // Here digit_count > 0. - if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } - // We can do the following... - // if (!jsoncharutils::is_structural_or_whitespace(*p)) { - // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; - // } - // as a single table lookup: - if(*p != '"') { return NUMBER_ERROR; } - // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. - // Performance note: This check is only needed when digit_count == longest_digit_count but it is - // so cheap that we might as well always make it. - if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } - return negative ? (~i+1) : i; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline bool is_negative(const uint8_t * src) noexcept { - return (*src == '-'); -} - -simdjson_unused simdjson_really_inline simdjson_result is_integer(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } - return false; -} - -simdjson_unused simdjson_really_inline simdjson_result get_number_type(const uint8_t * src) noexcept { - bool negative = (*src == '-'); - src += negative; - const uint8_t *p = src; - while(static_cast(*p - '0') <= 9) { p++; } - if ( p == src ) { return NUMBER_ERROR; } - if (jsoncharutils::is_structural_or_whitespace(*p)) { - // We have an integer. - // If the number is negative and valid, it must be a signed integer. - if(negative) { return ondemand::number_type::signed_integer; } - // We want values larger or equal to 9223372036854775808 to be unsigned - // integers, and the other values to be signed integers. - int digit_count = int(p - src); - if(digit_count >= 19) { - const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); - if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { - return ondemand::number_type::unsigned_integer; - } - } - return ondemand::number_type::signed_integer; - } - // Hopefully, we have 'e' or 'E' or '.'. - return ondemand::number_type::floating_point_number; -} - -// Never read at src_end or beyond -simdjson_unused simdjson_really_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { - if(src == src_end) { return NUMBER_ERROR; } - // - // Check for minus sign - // - bool negative = (*src == '-'); - src += negative; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - if(p == src_end) { return NUMBER_ERROR; } - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while ((p != src_end) && parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely((p != src_end) && (*p == '.'))) { - p++; - const uint8_t *start_decimal_digits = p; - if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while ((p != src_end) && parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if ((p != src_end) && (*p == 'e' || *p == 'E')) { - p++; - if(p == src_end) { return NUMBER_ERROR; } - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while ((p != src_end) && parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, src_end, &d)) { - return NUMBER_ERROR; - } - return d; -} - -simdjson_unused simdjson_really_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { - // - // Check for minus sign - // - bool negative = (*(src + 1) == '-'); - src += negative + 1; - - // - // Parse the integer part. - // - uint64_t i = 0; - const uint8_t *p = src; - p += parse_digit(*p, i); - bool leading_zero = (i == 0); - while (parse_digit(*p, i)) { p++; } - // no integer digits, or 0123 (zero must be solo) - if ( p == src ) { return INCORRECT_TYPE; } - if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } - - // - // Parse the decimal part. - // - int64_t exponent = 0; - bool overflow; - if (simdjson_likely(*p == '.')) { - p++; - const uint8_t *start_decimal_digits = p; - if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits - p++; - while (parse_digit(*p, i)) { p++; } - exponent = -(p - start_decimal_digits); - - // Overflow check. More than 19 digits (minus the decimal) may be overflow. - overflow = p-src-1 > 19; - if (simdjson_unlikely(overflow && leading_zero)) { - // Skip leading 0.00000 and see if it still overflows - const uint8_t *start_digits = src + 2; - while (*start_digits == '0') { start_digits++; } - overflow = start_digits-src > 19; - } - } else { - overflow = p-src > 19; - } - - // - // Parse the exponent - // - if (*p == 'e' || *p == 'E') { - p++; - bool exp_neg = *p == '-'; - p += exp_neg || *p == '+'; - - uint64_t exp = 0; - const uint8_t *start_exp_digits = p; - while (parse_digit(*p, exp)) { p++; } - // no exp digits, or 20+ exp digits - if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } - - exponent += exp_neg ? 0-exp : exp; - } - - if (*p != '"') { return NUMBER_ERROR; } - - overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; - - // - // Assemble (or slow-parse) the float - // - double d; - if (simdjson_likely(!overflow)) { - if (compute_float_64(exponent, i, negative, d)) { return d; } - } - if (!parse_float_fallback(src-negative, &d)) { - return NUMBER_ERROR; - } - return d; -} -} //namespace {} -#endif // SIMDJSON_SKIPNUMBERPARSING - -} // namespace numberparsing -} // unnamed namespace -} // namespace westmere -} // namespace simdjson -/* end file include/simdjson/generic/numberparsing.h */ - -#endif // SIMDJSON_WESTMERE_NUMBERPARSING_H -/* end file include/simdjson/westmere/numberparsing.h */ -/* begin file include/simdjson/westmere/end.h */ -SIMDJSON_UNTARGET_WESTMERE -/* end file include/simdjson/westmere/end.h */ - -#endif // SIMDJSON_IMPLEMENTATION_WESTMERE -#endif // SIMDJSON_WESTMERE_COMMON_H -/* end file include/simdjson/westmere.h */ - -// Builtin implementation - -SIMDJSON_POP_DISABLE_WARNINGS - -#endif // SIMDJSON_IMPLEMENTATIONS_H -/* end file include/simdjson/implementations.h */ - -// Determine the best builtin implementation -#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION -#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE -#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake -#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL -#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell -#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE -#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere -#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64 -#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 -#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 -#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 -#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK -#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback -#else -#error "All possible implementations (including fallback) have been disabled! simdjson will not run." -#endif -#endif // SIMDJSON_BUILTIN_IMPLEMENTATION - -// redefining SIMDJSON_IMPLEMENTATION to "SIMDJSON_BUILTIN_IMPLEMENTATION" -// #define SIMDJSON_IMPLEMENTATION SIMDJSON_BUILTIN_IMPLEMENTATION - -// ondemand is only compiled as part of the builtin implementation at present - -// Interface declarations -/* begin file include/simdjson/generic/implementation_simdjson_result_base.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { - -// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair -// so we can avoid inlining errors -// TODO reconcile these! -/** - * The result of a simdjson operation that could fail. - * - * Gives the option of reading error codes, or throwing an exception by casting to the desired result. - * - * This is a base class for implementations that want to add functions to the result type for - * chaining. - * - * Override like: - * - * struct simdjson_result : public internal::implementation_simdjson_result_base { - * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} - * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} - * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} - * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} - * // Your extra methods here - * } - * - * Then any method returning simdjson_result will be chainable with your methods. - */ -template -struct implementation_simdjson_result_base { - - /** - * Create a new empty result with error = UNINITIALIZED. - */ - simdjson_really_inline implementation_simdjson_result_base() noexcept = default; - - /** - * Create a new error result. - */ - simdjson_really_inline implementation_simdjson_result_base(error_code error) noexcept; - - /** - * Create a new successful result. - */ - simdjson_really_inline implementation_simdjson_result_base(T &&value) noexcept; - - /** - * Create a new result with both things (use if you don't want to branch when creating the result). - */ - simdjson_really_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; - - /** - * Move the value and the error to the provided variables. - * - * @param value The variable to assign the value to. May not be set if there is an error. - * @param error The variable to assign the error to. Set to SUCCESS if there is no error. - */ - simdjson_really_inline void tie(T &value, error_code &error) && noexcept; - - /** - * Move the value to the provided variable. - * - * @param value The variable to assign the value to. May not be set if there is an error. - */ - simdjson_really_inline error_code get(T &value) && noexcept; - - /** - * The error. - */ - simdjson_really_inline error_code error() const noexcept; - -#if SIMDJSON_EXCEPTIONS - - /** - * Get the result value. - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T& value() & noexcept(false); - - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T&& value() && noexcept(false); - - /** - * Take the result value (move it). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline T&& take_value() && noexcept(false); - - /** - * Cast to the value (will throw on error). - * - * @throw simdjson_error if there was an error. - */ - simdjson_really_inline operator T&&() && noexcept(false); - - -#endif // SIMDJSON_EXCEPTIONS - - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_really_inline const T& value_unsafe() const& noexcept; - /** - * Get the result value. This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_really_inline T& value_unsafe() & noexcept; - /** - * Take the result value (move it). This function is safe if and only - * the error() method returns a value that evaluates to false. - */ - simdjson_really_inline T&& value_unsafe() && noexcept; -protected: - /** users should never directly access first and second. **/ - T first{}; /** Users should never directly access 'first'. **/ - error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ -}; // struct implementation_simdjson_result_base - -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/implementation_simdjson_result_base.h */ -/* begin file include/simdjson/generic/ondemand.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -/** - * A fast, simple, DOM-like interface that parses JSON as you use it. - * - * Designed for maximum speed and a lower memory profile. - */ -namespace ondemand { - -/** Represents the depth of a JSON value (number of nested arrays/objects). */ -using depth_t = int32_t; - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -/* begin file include/simdjson/generic/ondemand/json_type.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { -/** - * The type of a JSON value. - */ -enum class json_type { - // Start at 1 to catch uninitialized / default values more easily - array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) - object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) - number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) - string, ///< A JSON string ( "a" or "hello world\n" ...) - boolean, ///< A JSON boolean (true or false) - null ///< A JSON null (null) -}; - -class value_iterator; - -/** - * A type representing a JSON number. - * The design of the struct is deliberately straight-forward. All - * functions return standard values with no error check. - */ -struct number { - - /** - * return the automatically determined type of - * the number: number_type::floating_point_number, - * number_type::signed_integer or number_type::unsigned_integer. - * - * enum class number_type { - * floating_point_number=1, /// a binary64 number - * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement - * unsigned_integer /// a positive integer larger or equal to 1<<63 - * }; - */ - simdjson_really_inline number_type get_number_type() const noexcept; - /** - * return true if the automatically determined type of - * the number is number_type::unsigned_integer. - */ - simdjson_really_inline bool is_uint64() const noexcept; - /** - * return the value as a uint64_t, only valid if is_uint64() is true. - */ - simdjson_really_inline uint64_t get_uint64() const noexcept; - simdjson_really_inline operator uint64_t() const noexcept; - - /** - * return true if the automatically determined type of - * the number is number_type::signed_integer. - */ - simdjson_really_inline bool is_int64() const noexcept; - /** - * return the value as a int64_t, only valid if is_int64() is true. - */ - simdjson_really_inline int64_t get_int64() const noexcept; - simdjson_really_inline operator int64_t() const noexcept; - - - /** - * return true if the automatically determined type of - * the number is number_type::floating_point_number. - */ - simdjson_really_inline bool is_double() const noexcept; - /** - * return the value as a double, only valid if is_double() is true. - */ - simdjson_really_inline double get_double() const noexcept; - simdjson_really_inline operator double() const noexcept; - - /** - * Convert the number to a double. Though it always succeed, the conversion - * may be lossy if the number cannot be represented exactly. - */ - simdjson_really_inline double as_double() const noexcept; - - -protected: - /** - * The next block of declaration is designed so that we can call the number parsing - * functions on a number type. They are protected and should never be used outside - * of the core simdjson library. - */ - friend class value_iterator; - template - friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); - template - friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); - template - friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); - /** Store a signed 64-bit value to the number. */ - simdjson_really_inline void append_s64(int64_t value) noexcept; - /** Store an unsigned 64-bit value to the number. */ - simdjson_really_inline void append_u64(uint64_t value) noexcept; - /** Store a double value to the number. */ - simdjson_really_inline void append_double(double value) noexcept; - /** Specifies that the value is a double, but leave it undefined. */ - simdjson_really_inline void skip_double() noexcept; - /** - * End of friend declarations. - */ - - /** - * Our attributes are a union type (size = 64 bits) - * followed by a type indicator. - */ - union { - double floating_point_number; - int64_t signed_integer; - uint64_t unsigned_integer; - } payload{0}; - number_type type{number_type::signed_integer}; -}; - -/** - * Write the JSON type to the output stream - * - * @param out The output stream. - * @param type The json_type. - */ -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; -inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept; - -#if SIMDJSON_EXCEPTIONS -/** - * Send JSON type to an output stream. - * - * @param out The output stream. - * @param type The json_type. - * @throw simdjson_error if the result being printed has an error. If there is an error with the - * underlying output stream, that error will be propagated (simdjson_error will not be - * thrown). - */ -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); -#endif - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/json_type.h */ -/* begin file include/simdjson/generic/ondemand/token_position.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -/** @private Position in the JSON buffer indexes */ -using token_position = const uint32_t *; - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/token_position.h */ -/* begin file include/simdjson/generic/ondemand/logger.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -class json_iterator; -class value_iterator; - -namespace logger { - -#if SIMDJSON_VERBOSE_LOGGING - static constexpr const bool LOG_ENABLED = true; -#else - static constexpr const bool LOG_ENABLED = false; -#endif - -// We do not want these functions to be 'really inlined' since real inlining is -// for performance purposes and if you are using the loggers, you do not care about -// performance (or should not). -static inline void log_headers() noexcept; -static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; -static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; -static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; -static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; -static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; -static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; -static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; -static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; -static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; -static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; - -static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; -static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; -static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; -static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; -static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; - -} // namespace logger -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/logger.h */ -/* begin file include/simdjson/generic/ondemand/raw_json_string.h */ - -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -class object; -class parser; -class json_iterator; - -/** - * A string escaped per JSON rules, terminated with quote ("). They are used to represent - * unescaped keys inside JSON documents. - * - * (In other words, a pointer to the beginning of a string, just after the start quote, inside a - * JSON file.) - * - * This class is deliberately simplistic and has little functionality. You can - * compare a raw_json_string instance with an unescaped C string, but - * that is pretty much all you can do. - * - * They originate typically from field instance which in turn represent key-value pairs from - * object instances. From a field instance, you get the raw_json_string instance by calling key(). - * You can, if you want a more usable string_view instance, call the unescaped_key() method - * on the field instance. - */ -class raw_json_string { -public: - /** - * Create a new invalid raw_json_string. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline raw_json_string() noexcept = default; - - /** - * Create a new invalid raw_json_string pointed at the given location in the JSON. - * - * The given location must be just *after* the beginning quote (") in the JSON file. - * - * It *must* be terminated by a ", and be a valid JSON string. - */ - simdjson_really_inline raw_json_string(const uint8_t * _buf) noexcept; - /** - * Get the raw pointer to the beginning of the string in the JSON (just after the "). - * - * It is possible for this function to return a null pointer if the instance - * has outlived its existence. - */ - simdjson_really_inline const char * raw() const noexcept; - - /** - * This compares the current instance to the std::string_view target: returns true if - * they are byte-by-byte equal (no escaping is done) on target.size() characters, - * and if the raw_json_string instance has a quote character at byte index target.size(). - * We never read more than length + 1 bytes in the raw_json_string instance. - * If length is smaller than target.size(), this will return false. - * - * The std::string_view instance may contain any characters. However, the caller - * is responsible for setting length so that length bytes may be read in the - * raw_json_string. - * - * Performance: the comparison may be done using memcmp which may be efficient - * for long strings. - */ - simdjson_really_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; - - /** - * This compares the current instance to the std::string_view target: returns true if - * they are byte-by-byte equal (no escaping is done). - * The std::string_view instance should not contain unescaped quote characters: - * the caller is responsible for this check. See is_free_from_unescaped_quote. - * - * Performance: the comparison is done byte-by-byte which might be inefficient for - * long strings. - * - * If target is a compile-time constant, and your compiler likes you, - * you should be able to do the following without performance penalty... - * - * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); - * s.unsafe_is_equal(target); - */ - simdjson_really_inline bool unsafe_is_equal(std::string_view target) const noexcept; - - /** - * This compares the current instance to the C string target: returns true if - * they are byte-by-byte equal (no escaping is done). - * The provided C string should not contain an unescaped quote character: - * the caller is responsible for this check. See is_free_from_unescaped_quote. - * - * If target is a compile-time constant, and your compiler likes you, - * you should be able to do the following without performance penalty... - * - * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); - * s.unsafe_is_equal(target); - */ - simdjson_really_inline bool unsafe_is_equal(const char* target) const noexcept; - - /** - * This compares the current instance to the std::string_view target: returns true if - * they are byte-by-byte equal (no escaping is done). - */ - simdjson_really_inline bool is_equal(std::string_view target) const noexcept; - - /** - * This compares the current instance to the C string target: returns true if - * they are byte-by-byte equal (no escaping is done). - */ - simdjson_really_inline bool is_equal(const char* target) const noexcept; - - /** - * Returns true if target is free from unescaped quote. If target is known at - * compile-time, we might expect the computation to happen at compile time with - * many compilers (not all!). - */ - static simdjson_really_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; - static simdjson_really_inline bool is_free_from_unescaped_quote(const char* target) noexcept; - -private: - - - /** - * This will set the inner pointer to zero, effectively making - * this instance unusable. - */ - simdjson_really_inline void consume() noexcept { buf = nullptr; } - - /** - * Checks whether the inner pointer is non-null and thus usable. - */ - simdjson_really_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } - - /** - * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * - * ## IMPORTANT: string_view lifetime - * - * The string_view is only valid as long as the bytes in dst. - * - * @param dst A pointer to a buffer at least large enough to write this string as well as a \0. - * dst will be updated to the next unused location (just after the \0 written out at - * the end of this string). - * @return A string_view pointing at the unescaped string in dst - * @error STRING_ERROR if escapes are incorrect. - */ - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; - /** - * Unescape this JSON string, replacing \\ with \, \n with newline, etc. - * - * ## IMPORTANT: string_view lifetime - * - * The string_view is only valid until the next parse() call on the parser. - * - * @param iter A json_iterator, which contains a buffer where the string will be written. - */ - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter) const noexcept; - - const uint8_t * buf{}; - friend class object; - friend class field; - friend struct simdjson_result; -}; - -simdjson_unused simdjson_really_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; - -/** - * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible - * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. - */ -simdjson_unused simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; -simdjson_unused simdjson_really_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; -simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; -simdjson_unused simdjson_really_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; - - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private - - simdjson_really_inline simdjson_result raw() const noexcept; - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(uint8_t *&dst) const noexcept; - simdjson_really_inline simdjson_warn_unused simdjson_result unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/raw_json_string.h */ -/* begin file include/simdjson/generic/ondemand/token_iterator.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -/** - * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) - * detected by stage 1. - * - * @private This is not intended for external use. - */ -class token_iterator { -public: - /** - * Create a new invalid token_iterator. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline token_iterator() noexcept = default; - simdjson_really_inline token_iterator(token_iterator &&other) noexcept = default; - simdjson_really_inline token_iterator &operator=(token_iterator &&other) noexcept = default; - simdjson_really_inline token_iterator(const token_iterator &other) noexcept = default; - simdjson_really_inline token_iterator &operator=(const token_iterator &other) noexcept = default; - - /** - * Advance to the next token (returning the current one). - */ - simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; - /** - * Reports the current offset in bytes from the start of the underlying buffer. - */ - simdjson_really_inline uint32_t current_offset() const noexcept; - /** - * Get the JSON text for a given token (relative). - * - * This is not null-terminated; it is a view into the JSON. - * - * @param delta The relative position of the token to retrieve. e.g. 0 = current token, - * 1 = next token, -1 = prev token. - * - * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... - */ - simdjson_really_inline const uint8_t *peek(int32_t delta=0) const noexcept; - /** - * Get the maximum length of the JSON text for a given token. - * - * The length will include any whitespace at the end of the token. - * - * @param delta The relative position of the token to retrieve. e.g. 0 = current token, - * 1 = next token, -1 = prev token. - */ - simdjson_really_inline uint32_t peek_length(int32_t delta=0) const noexcept; - - /** - * Get the JSON text for a given token. - * - * This is not null-terminated; it is a view into the JSON. - * - * @param position The position of the token. - * - */ - simdjson_really_inline const uint8_t *peek(token_position position) const noexcept; - /** - * Get the maximum length of the JSON text for a given token. - * - * The length will include any whitespace at the end of the token. - * - * @param position The position of the token. - */ - simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; - - /** - * Return the current index. - */ - simdjson_really_inline token_position position() const noexcept; - /** - * Reset to a previously saved index. - */ - simdjson_really_inline void set_position(token_position target_position) noexcept; - - // NOTE: we don't support a full C++ iterator interface, because we expect people to make - // different calls to advance the iterator based on *their own* state. - - simdjson_really_inline bool operator==(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator!=(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator>(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator>=(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator<(const token_iterator &other) const noexcept; - simdjson_really_inline bool operator<=(const token_iterator &other) const noexcept; - -protected: - simdjson_really_inline token_iterator(const uint8_t *buf, token_position position) noexcept; - - /** - * Get the index of the JSON text for a given token (relative). - * - * This is not null-terminated; it is a view into the JSON. - * - * @param delta The relative position of the token to retrieve. e.g. 0 = current token, - * 1 = next token, -1 = prev token. - */ - simdjson_really_inline uint32_t peek_index(int32_t delta=0) const noexcept; - /** - * Get the index of the JSON text for a given token. - * - * This is not null-terminated; it is a view into the JSON. - * - * @param position The position of the token. - * - */ - simdjson_really_inline uint32_t peek_index(token_position position) const noexcept; - - const uint8_t *buf{}; - token_position _position{}; - - friend class json_iterator; - friend class value_iterator; - friend class object; - friend simdjson_really_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; - friend simdjson_really_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; -}; - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline ~simdjson_result() noexcept = default; ///< @private -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/token_iterator.h */ -/* begin file include/simdjson/generic/ondemand/json_iterator.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -class document; -class document_stream; -class object; -class array; -class value; -class raw_json_string; -class parser; - -/** - * Iterates through JSON tokens, keeping track of depth and string buffer. - * - * @private This is not intended for external use. - */ -class json_iterator { -protected: - token_iterator token{}; - ondemand::parser *parser{}; - /** - * Next free location in the string buffer. - * - * Used by raw_json_string::unescape() to have a place to unescape strings to. - */ - uint8_t *_string_buf_loc{}; - /** - * JSON error, if there is one. - * - * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. - * - * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first - * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If - * this is not elided, we should make sure it's at least not using up a register. Failing that, - * we should store it in document so there's only one of them. - */ - error_code error{SUCCESS}; - /** - * Depth of the current token in the JSON. - * - * - 0 = finished with document - * - 1 = document root value (could be [ or {, not yet known) - * - 2 = , or } inside root array/object - * - 3 = key or value inside root array/object. - */ - depth_t _depth{}; - /** - * Beginning of the document indexes. - * Normally we have root == parser->implementation->structural_indexes.get() - * but this may differ, especially in streaming mode (where we have several - * documents); - */ - token_position _root{}; - /** - * Normally, a json_iterator operates over a single document, but in - * some cases, we may have a stream of documents. This attribute is meant - * as meta-data: the json_iterator works the same irrespective of the - * value of this attribute. - */ - bool _streaming{false}; - -public: - simdjson_really_inline json_iterator() noexcept = default; - simdjson_really_inline json_iterator(json_iterator &&other) noexcept; - simdjson_really_inline json_iterator &operator=(json_iterator &&other) noexcept; - simdjson_really_inline explicit json_iterator(const json_iterator &other) noexcept = default; - simdjson_really_inline json_iterator &operator=(const json_iterator &other) noexcept = default; - /** - * Skips a JSON value, whether it is a scalar, array or object. - */ - simdjson_warn_unused simdjson_really_inline error_code skip_child(depth_t parent_depth) noexcept; - - /** - * Tell whether the iterator is still at the start - */ - simdjson_really_inline bool at_root() const noexcept; - - /** - * Tell whether we should be expected to run in streaming - * mode (iterating over many documents). It is pure metadata - * that does not affect how the iterator works. It is used by - * start_root_array() and start_root_object(). - */ - simdjson_really_inline bool streaming() const noexcept; - - /** - * Get the root value iterator - */ - simdjson_really_inline token_position root_position() const noexcept; - /** - * Assert that we are at the document depth (== 1) - */ - simdjson_really_inline void assert_at_document_depth() const noexcept; - /** - * Assert that we are at the root of the document - */ - simdjson_really_inline void assert_at_root() const noexcept; - - /** - * Tell whether the iterator is at the EOF mark - */ - simdjson_really_inline bool at_end() const noexcept; - - /** - * Tell whether the iterator is live (has not been moved). - */ - simdjson_really_inline bool is_alive() const noexcept; - - /** - * Abandon this iterator, setting depth to 0 (as if the document is finished). - */ - simdjson_really_inline void abandon() noexcept; - - /** - * Advance the current token without modifying depth. - */ - simdjson_really_inline const uint8_t *return_current_and_advance() noexcept; - - /** - * Assert that there are at least the given number of tokens left. - * - * Has no effect in release builds. - */ - simdjson_really_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; - /** - * Assert that the given position addresses an actual token (is within bounds). - * - * Has no effect in release builds. - */ - simdjson_really_inline void assert_valid_position(token_position position) const noexcept; - /** - * Get the JSON text for a given token (relative). - * - * This is not null-terminated; it is a view into the JSON. - * - * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. - * - * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... - */ - simdjson_really_inline const uint8_t *peek(int32_t delta=0) const noexcept; - /** - * Get the maximum length of the JSON text for the current token (or relative). - * - * The length will include any whitespace at the end of the token. - * - * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. - */ - simdjson_really_inline uint32_t peek_length(int32_t delta=0) const noexcept; - /** - * Get a pointer to the current location in the input buffer. - * - * This is not null-terminated; it is a view into the JSON. - * - * You may be pointing outside of the input buffer: it is not generally - * safe to dereference this pointer. - */ - simdjson_really_inline const uint8_t *unsafe_pointer() const noexcept; - /** - * Get the JSON text for a given token. - * - * This is not null-terminated; it is a view into the JSON. - * - * @param position The position of the token to retrieve. - * - * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... - */ - simdjson_really_inline const uint8_t *peek(token_position position) const noexcept; - /** - * Get the maximum length of the JSON text for the current token (or relative). - * - * The length will include any whitespace at the end of the token. - * - * @param position The position of the token to retrieve. - */ - simdjson_really_inline uint32_t peek_length(token_position position) const noexcept; - /** - * Get the JSON text for the last token in the document. - * - * This is not null-terminated; it is a view into the JSON. - * - * TODO consider a string_view, assuming the length will get stripped out by the optimizer when - * it isn't used ... - */ - simdjson_really_inline const uint8_t *peek_last() const noexcept; - - /** - * Ascend one level. - * - * Validates that the depth - 1 == parent_depth. - * - * @param parent_depth the expected parent depth. - */ - simdjson_really_inline void ascend_to(depth_t parent_depth) noexcept; - - /** - * Descend one level. - * - * Validates that the new depth == child_depth. - * - * @param child_depth the expected child depth. - */ - simdjson_really_inline void descend_to(depth_t child_depth) noexcept; - simdjson_really_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; - - /** - * Get current depth. - */ - simdjson_really_inline depth_t depth() const noexcept; - - /** - * Get current (writeable) location in the string buffer. - */ - simdjson_really_inline uint8_t *&string_buf_loc() noexcept; - - /** - * Report an unrecoverable error, preventing further iteration. - * - * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. - * @param message An error message to report with the error. - */ - simdjson_really_inline error_code report_error(error_code error, const char *message) noexcept; - - /** - * Log error, but don't stop iteration. - * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. - * @param message An error message to report with the error. - */ - simdjson_really_inline error_code optional_error(error_code error, const char *message) noexcept; - - template simdjson_warn_unused simdjson_really_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept; - - simdjson_really_inline token_position position() const noexcept; - simdjson_really_inline void reenter_child(token_position position, depth_t child_depth) noexcept; -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - simdjson_really_inline token_position start_position(depth_t depth) const noexcept; - simdjson_really_inline void set_start_position(depth_t depth, token_position position) noexcept; -#endif - /* Useful for debugging and logging purposes. */ - inline std::string to_string() const noexcept; - - /** - * Returns the current location in the document if in bounds. - */ - inline simdjson_result current_location() noexcept; - - /** - * Updates this json iterator so that it is back at the beginning of the document, - * as if it had just been created. - */ - inline void rewind() noexcept; -protected: - simdjson_really_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; - /// The last token before the end - simdjson_really_inline token_position last_position() const noexcept; - /// The token *at* the end. This points at gibberish and should only be used for comparison. - simdjson_really_inline token_position end_position() const noexcept; - /// The end of the buffer. - simdjson_really_inline token_position end() const noexcept; - - friend class document; - friend class document_stream; - friend class object; - friend class array; - friend class value; - friend class raw_json_string; - friend class parser; - friend class value_iterator; - friend simdjson_really_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; - friend simdjson_really_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; -}; // json_iterator - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - - simdjson_really_inline simdjson_result() noexcept = default; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/json_iterator.h */ -/* begin file include/simdjson/generic/ondemand/value_iterator.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -class document; -class object; -class array; -class value; -class raw_json_string; -class parser; - -/** - * Iterates through a single JSON value at a particular depth. - * - * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects - * the caller to call the right ones. - * - * @private This is not intended for external use. - */ -class value_iterator { -protected: - /** The underlying JSON iterator */ - json_iterator *_json_iter{}; - /** The depth of this value */ - depth_t _depth{}; - /** - * The starting token index for this value - */ - token_position _start_position{}; - -public: - simdjson_really_inline value_iterator() noexcept = default; - - /** - * Denote that we're starting a document. - */ - simdjson_really_inline void start_document() noexcept; - - /** - * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. - * - * Optimized for scalars. - */ - simdjson_warn_unused simdjson_really_inline error_code skip_child() noexcept; - - /** - * Tell whether the iterator is at the EOF mark - */ - simdjson_really_inline bool at_end() const noexcept; - - /** - * Tell whether the iterator is at the start of the value - */ - simdjson_really_inline bool at_start() const noexcept; - - /** - * Tell whether the value is open--if the value has not been used, or the array/object is still open. - */ - simdjson_really_inline bool is_open() const noexcept; - - /** - * Tell whether the value is at an object's first field (just after the {). - */ - simdjson_really_inline bool at_first_field() const noexcept; - - /** - * Abandon all iteration. - */ - simdjson_really_inline void abandon() noexcept; - - /** - * Get the child value as a value_iterator. - */ - simdjson_really_inline value_iterator child_value() const noexcept; - - /** - * Get the depth of this value. - */ - simdjson_really_inline int32_t depth() const noexcept; - - /** - * Get the JSON type of this value. - * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". - */ - simdjson_really_inline simdjson_result type() const noexcept; - - /** - * @addtogroup object Object iteration - * - * Methods to iterate and find object fields. These methods generally *assume* the value is - * actually an object; the caller is responsible for keeping track of that fact. - * - * @{ - */ - - /** - * Start an object iteration. - * - * @returns Whether the object had any fields (returns false for empty). - * @error INCORRECT_TYPE if there is no opening { - */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_object() noexcept; - /** - * Start an object iteration from the root. - * - * @returns Whether the object had any fields (returns false for empty). - * @error INCORRECT_TYPE if there is no opening { - * @error TAPE_ERROR if there is no matching } at end of document - */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_root_object() noexcept; - - /** - * Start an object iteration after the user has already checked and moved past the {. - * - * Does not move the iterator unless the object is empty ({}). - * - * @returns Whether the object had any fields (returns false for empty). - * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). - */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_object() noexcept; - /** - * Start an object iteration from the root, after the user has already checked and moved past the {. - * - * Does not move the iterator unless the object is empty ({}). - * - * @returns Whether the object had any fields (returns false for empty). - * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). - */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_root_object() noexcept; - - /** - * Moves to the next field in an object. - * - * Looks for , and }. If } is found, the object is finished and the iterator advances past it. - * Otherwise, it advances to the next value. - * - * @return whether there is another field in the object. - * @error TAPE_ERROR If there is a comma missing between fields. - * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. - */ - simdjson_warn_unused simdjson_really_inline simdjson_result has_next_field() noexcept; - - /** - * Get the current field's key. - */ - simdjson_warn_unused simdjson_really_inline simdjson_result field_key() noexcept; - - /** - * Pass the : in the field and move to its value. - */ - simdjson_warn_unused simdjson_really_inline error_code field_value() noexcept; - - /** - * Find the next field with the given key. - * - * Assumes you have called next_field() or otherwise matched the previous value. - * - * This means the iterator must be sitting at the next key: - * - * ``` - * { "a": 1, "b": 2 } - * ^ - * ``` - * - * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to - * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may - * fail to match some keys with escapes (\u, \n, etc.). - */ - simdjson_warn_unused simdjson_really_inline error_code find_field(const std::string_view key) noexcept; - - /** - * Find the next field with the given key, *without* unescaping. This assumes object order: it - * will not find the field if it was already passed when looking for some *other* field. - * - * Assumes you have called next_field() or otherwise matched the previous value. - * - * This means the iterator must be sitting at the next key: - * - * ``` - * { "a": 1, "b": 2 } - * ^ - * ``` - * - * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to - * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may - * fail to match some keys with escapes (\u, \n, etc.). - */ - simdjson_warn_unused simdjson_really_inline simdjson_result find_field_raw(const std::string_view key) noexcept; - - /** - * Find the field with the given key without regard to order, and *without* unescaping. - * - * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. - * - * Assumes you have called next_field() or otherwise matched the previous value. - * - * This means the iterator must be sitting at the next key: - * - * ``` - * { "a": 1, "b": 2 } - * ^ - * ``` - * - * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to - * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may - * fail to match some keys with escapes (\u, \n, etc.). - */ - simdjson_warn_unused simdjson_really_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; - - /** @} */ - - /** - * @addtogroup array Array iteration - * Methods to iterate over array elements. These methods generally *assume* the value is actually - * an object; the caller is responsible for keeping track of that fact. - * @{ - */ - - /** - * Check for an opening [ and start an array iteration. - * - * @returns Whether the array had any elements (returns false for empty). - * @error INCORRECT_TYPE If there is no [. - */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_array() noexcept; - /** - * Check for an opening [ and start an array iteration while at the root. - * - * @returns Whether the array had any elements (returns false for empty). - * @error INCORRECT_TYPE If there is no [. - * @error TAPE_ERROR if there is no matching ] at end of document - */ - simdjson_warn_unused simdjson_really_inline simdjson_result start_root_array() noexcept; - - /** - * Start an array iteration, after the user has already checked and moved past the [. - * - * Does not move the iterator unless the array is empty ([]). - * - * @returns Whether the array had any elements (returns false for empty). - * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). - */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_array() noexcept; - /** - * Start an array iteration from the root, after the user has already checked and moved past the [. - * - * Does not move the iterator unless the array is empty ([]). - * - * @returns Whether the array had any elements (returns false for empty). - * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). - */ - simdjson_warn_unused simdjson_really_inline simdjson_result started_root_array() noexcept; - - /** - * Moves to the next element in an array. - * - * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. - * Otherwise, it advances to the next value. - * - * @return Whether there is another element in the array. - * @error TAPE_ERROR If there is a comma missing between elements. - */ - simdjson_warn_unused simdjson_really_inline simdjson_result has_next_element() noexcept; - - /** - * Get a child value iterator. - */ - simdjson_warn_unused simdjson_really_inline value_iterator child() const noexcept; - - /** @} */ - - /** - * @defgroup scalar Scalar values - * @addtogroup scalar - * @{ - */ - - simdjson_warn_unused simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_int64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_double_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline bool is_null() noexcept; - simdjson_warn_unused simdjson_really_inline bool is_negative() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; - - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_raw_json_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_uint64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_uint64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_int64() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_int64_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_double() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_double_in_string() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_bool() noexcept; - simdjson_warn_unused simdjson_really_inline bool is_root_negative() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result is_root_integer() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_number_type() noexcept; - simdjson_warn_unused simdjson_really_inline simdjson_result get_root_number() noexcept; - simdjson_really_inline bool is_root_null() noexcept; - - simdjson_really_inline error_code error() const noexcept; - simdjson_really_inline uint8_t *&string_buf_loc() noexcept; - simdjson_really_inline const json_iterator &json_iter() const noexcept; - simdjson_really_inline json_iterator &json_iter() noexcept; - - simdjson_really_inline void assert_is_valid() const noexcept; - simdjson_really_inline bool is_valid() const noexcept; - - /** @} */ -protected: - /** - * Restarts an array iteration. - * @returns Whether the array has any elements (returns false for empty). - */ - simdjson_really_inline simdjson_result reset_array() noexcept; - /** - * Restarts an object iteration. - * @returns Whether the object has any fields (returns false for empty). - */ - simdjson_really_inline simdjson_result reset_object() noexcept; - /** - * move_at_start(): moves us so that we are pointing at the beginning of - * the container. It updates the index so that at_start() is true and it - * syncs the depth. The user can then create a new container instance. - * - * Usage: used with value::count_elements(). - **/ - simdjson_really_inline void move_at_start() noexcept; - - /** - * move_at_container_start(): moves us so that we are pointing at the beginning of - * the container so that assert_at_container_start() passes. - * - * Usage: used with reset_array() and reset_object(). - **/ - simdjson_really_inline void move_at_container_start() noexcept; - /* Useful for debugging and logging purposes. */ - inline std::string to_string() const noexcept; - simdjson_really_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; - - simdjson_really_inline bool parse_null(const uint8_t *json) const noexcept; - simdjson_really_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; - simdjson_really_inline const uint8_t *peek_start() const noexcept; - simdjson_really_inline uint32_t peek_start_length() const noexcept; - - /** - * The general idea of the advance_... methods and the peek_* methods - * is that you first peek and check that you have desired type. If you do, - * and only if you do, then you advance. - * - * We used to unconditionally advance. But this made reasoning about our - * current state difficult. - * Suppose you always advance. Look at the 'value' matching the key - * "shadowable" in the following example... - * - * ({"globals":{"a":{"shadowable":[}}}}) - * - * If the user thinks it is a Boolean and asks for it, then we check the '[', - * decide it is not a Boolean, but still move into the next character ('}'). Now - * we are left pointing at '}' right after a '['. And we have not yet reported - * an error, only that we do not have a Boolean. - * - * If, instead, you just stand your ground until it is content that you know, then - * you will only even move beyond the '[' if the user tells you that you have an - * array. So you will be at the '}' character inside the array and, hopefully, you - * will then catch the error because an array cannot start with '}', but the code - * processing Boolean values does not know this. - * - * So the contract is: first call 'peek_...' and then call 'advance_...' only - * if you have determined that it is a type you can handle. - * - * Unfortunately, it makes the code more verbose, longer and maybe more error prone. - */ - - simdjson_really_inline void advance_scalar(const char *type) noexcept; - simdjson_really_inline void advance_root_scalar(const char *type) noexcept; - simdjson_really_inline void advance_non_root_scalar(const char *type) noexcept; - - simdjson_really_inline const uint8_t *peek_scalar(const char *type) noexcept; - simdjson_really_inline const uint8_t *peek_root_scalar(const char *type) noexcept; - simdjson_really_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; - - - simdjson_really_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; - simdjson_really_inline error_code end_container() noexcept; - - /** - * Advance to a place expecting a value (increasing depth). - * - * @return The current token (the one left behind). - * @error TAPE_ERROR If the document ended early. - */ - simdjson_really_inline simdjson_result advance_to_value() noexcept; - - simdjson_really_inline error_code incorrect_type_error(const char *message) const noexcept; - simdjson_really_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; - - simdjson_really_inline bool is_at_start() const noexcept; - /** - * is_at_iterator_start() returns true on an array or object after it has just been - * created, whether the instance is empty or not. - * - * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) - */ - simdjson_really_inline bool is_at_iterator_start() const noexcept; - - /** - * Assuming that we are within an object, this returns true if we - * are pointing at a key. - * - * Usage: the skip_child() method should never be used while we are pointing - * at a key inside an object. - */ - simdjson_really_inline bool is_at_key() const noexcept; - - inline void assert_at_start() const noexcept; - inline void assert_at_container_start() const noexcept; - inline void assert_at_root() const noexcept; - inline void assert_at_child() const noexcept; - inline void assert_at_next() const noexcept; - inline void assert_at_non_root_start() const noexcept; - - /** Get the starting position of this value */ - simdjson_really_inline token_position start_position() const noexcept; - - /** @copydoc error_code json_iterator::position() const noexcept; */ - simdjson_really_inline token_position position() const noexcept; - /** @copydoc error_code json_iterator::end_position() const noexcept; */ - simdjson_really_inline token_position last_position() const noexcept; - /** @copydoc error_code json_iterator::end_position() const noexcept; */ - simdjson_really_inline token_position end_position() const noexcept; - /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ - simdjson_really_inline error_code report_error(error_code error, const char *message) noexcept; - - friend class document; - friend class object; - friend class array; - friend class value; -}; // value_iterator - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/value_iterator.h */ -/* begin file include/simdjson/generic/ondemand/array_iterator.h */ - -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -class array; -class value; -class document; - -/** - * A forward-only JSON array. - * - * This is an input_iterator, meaning: - * - It is forward-only - * - * must be called exactly once per element. - * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) - */ -class array_iterator { -public: - /** Create a new, invalid array iterator. */ - simdjson_really_inline array_iterator() noexcept = default; - - // - // Iterator interface - // - - /** - * Get the current element. - * - * Part of the std::iterator interface. - */ - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - /** - * Check if we are at the end of the JSON. - * - * Part of the std::iterator interface. - * - * @return true if there are no more elements in the JSON array. - */ - simdjson_really_inline bool operator==(const array_iterator &) const noexcept; - /** - * Check if there are more elements in the JSON array. - * - * Part of the std::iterator interface. - * - * @return true if there are more elements in the JSON array. - */ - simdjson_really_inline bool operator!=(const array_iterator &) const noexcept; - /** - * Move to the next element. - * - * Part of the std::iterator interface. - */ - simdjson_really_inline array_iterator &operator++() noexcept; - -private: - value_iterator iter{}; - - simdjson_really_inline array_iterator(const value_iterator &iter) noexcept; - - friend class array; - friend class value; - friend struct simdjson_result; -}; - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - - // - // Iterator interface - // - - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_really_inline bool operator==(const simdjson_result &) const noexcept; - simdjson_really_inline bool operator!=(const simdjson_result &) const noexcept; - simdjson_really_inline simdjson_result &operator++() noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/array_iterator.h */ -/* begin file include/simdjson/generic/ondemand/object_iterator.h */ - -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -class field; - -class object_iterator { -public: - /** - * Create a new invalid object_iterator. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline object_iterator() noexcept = default; - - // - // Iterator interface - // - - // Reads key and value, yielding them to the user. - // MUST ONLY BE CALLED ONCE PER ITERATION. - simdjson_really_inline simdjson_result operator*() noexcept; - // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_really_inline bool operator==(const object_iterator &) const noexcept; - // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_really_inline bool operator!=(const object_iterator &) const noexcept; - // Checks for ']' and ',' - simdjson_really_inline object_iterator &operator++() noexcept; - -private: - /** - * The underlying JSON iterator. - * - * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object - * is first used, and never changes afterwards. - */ - value_iterator iter{}; - - simdjson_really_inline object_iterator(const value_iterator &iter) noexcept; - friend struct simdjson_result; - friend class object; -}; - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - - // - // Iterator interface - // - - // Reads key and value, yielding them to the user. - simdjson_really_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. - // Assumes it's being compared with the end. true if depth < iter->depth. - simdjson_really_inline bool operator==(const simdjson_result &) const noexcept; - // Assumes it's being compared with the end. true if depth >= iter->depth. - simdjson_really_inline bool operator!=(const simdjson_result &) const noexcept; - // Checks for ']' and ',' - simdjson_really_inline simdjson_result &operator++() noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/object_iterator.h */ -/* begin file include/simdjson/generic/ondemand/array.h */ - -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -class value; -class document; - -/** - * A forward-only JSON array. - */ -class array { -public: - /** - * Create a new invalid array. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline array() noexcept = default; - - /** - * Begin array iteration. - * - * Part of the std::iterable interface. - */ - simdjson_really_inline simdjson_result begin() noexcept; - /** - * Sentinel representing the end of the array. - * - * Part of the std::iterable interface. - */ - simdjson_really_inline simdjson_result end() noexcept; - /** - * This method scans the array and counts the number of elements. - * The count_elements method should always be called before you have begun - * iterating through the array: it is expected that you are pointing at - * the beginning of the array. - * The runtime complexity is linear in the size of the array. After - * calling this function, if successful, the array is 'rewinded' at its - * beginning as if it had never been accessed. If the JSON is malformed (e.g., - * there is a missing comma), then an error is returned and it is no longer - * safe to continue. - * - * To check that an array is empty, it is more performant to use - * the is_empty() method. - */ - simdjson_really_inline simdjson_result count_elements() & noexcept; - /** - * This method scans the beginning of the array and checks whether the - * array is empty. - * The runtime complexity is constant time. After - * calling this function, if successful, the array is 'rewinded' at its - * beginning as if it had never been accessed. If the JSON is malformed (e.g., - * there is a missing comma), then an error is returned and it is no longer - * safe to continue. - */ - simdjson_really_inline simdjson_result is_empty() & noexcept; - /** - * Reset the iterator so that we are pointing back at the - * beginning of the array. You should still consume values only once even if you - * can iterate through the array more than once. If you unescape a string - * within the array more than once, you have unsafe code. Note that rewinding - * an array means that you may need to reparse it anew: it is not a free - * operation. - * - * @returns true if the array contains some elements (not empty) - */ - inline simdjson_result reset() & noexcept; - /** - * Get the value associated with the given JSON pointer. We use the RFC 6901 - * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node - * as the root of its own JSON document. - * - * ondemand::parser parser; - * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; - * auto doc = parser.iterate(json); - * doc.at_pointer("/0/foo/a/1") == 20 - * - * Note that at_pointer() called on the document automatically calls the document's rewind - * method between each call. It invalidates all previously accessed arrays, objects and values - * that have not been consumed. Yet it is not the case when calling at_pointer on an array - * instance: there is no rewind and no invalidation. - * - * You may only call at_pointer on an array after it has been created, but before it has - * been first accessed. When calling at_pointer on an array, the pointer is advanced to - * the location indicated by the JSON pointer (in case of success). It is no longer possible - * to call at_pointer on the same array. - * - * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. - * - * @return The value associated with the given JSON pointer, or: - * - NO_SUCH_FIELD if a field does not exist in an object - * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length - * - INCORRECT_TYPE if a non-integer is used to access an array - * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed - */ - inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; - /** - * Consumes the array and returns a string_view instance corresponding to the - * array as represented in JSON. It points inside the original document. - */ - simdjson_really_inline simdjson_result raw_json() noexcept; - - /** - * Get the value at the given index. This function has linear-time complexity. - * This function should only be called once on an array instance since the array iterator is not reset between each call. - * - * @return The value at the given index, or: - * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length - */ - simdjson_really_inline simdjson_result at(size_t index) noexcept; -protected: - /** - * Go to the end of the array, no matter where you are right now. - */ - simdjson_really_inline error_code consume() noexcept; - - /** - * Begin array iteration. - * - * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the - * resulting array. - * @error INCORRECT_TYPE if the iterator is not at [. - */ - static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; - /** - * Begin array iteration from the root. - * - * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the - * resulting array. - * @error INCORRECT_TYPE if the iterator is not at [. - * @error TAPE_ERROR if there is no closing ] at the end of the document. - */ - static simdjson_really_inline simdjson_result start_root(value_iterator &iter) noexcept; - /** - * Begin array iteration. - * - * This version of the method should be called after the initial [ has been verified, and is - * intended for use by switch statements that check the type of a value. - * - * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. - */ - static simdjson_really_inline simdjson_result started(value_iterator &iter) noexcept; - - /** - * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. - * - * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() - * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* - * into the resulting array. - */ - simdjson_really_inline array(const value_iterator &iter) noexcept; - - /** - * Iterator marking current position. - * - * iter.is_alive() == false indicates iteration is complete. - */ - value_iterator iter{}; - - friend class value; - friend class document; - friend struct simdjson_result; - friend struct simdjson_result; - friend class array_iterator; -}; - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - - simdjson_really_inline simdjson_result begin() noexcept; - simdjson_really_inline simdjson_result end() noexcept; - inline simdjson_result count_elements() & noexcept; - inline simdjson_result is_empty() & noexcept; - inline simdjson_result reset() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/array.h */ -/* begin file include/simdjson/generic/ondemand/document.h */ - -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -class parser; -class array; -class object; -class value; -class raw_json_string; -class array_iterator; -class document_stream; - -/** - * A JSON document. It holds a json_iterator instance. - * - * Used by tokens to get text, and string buffer location. - * - * You must keep the document around during iteration. - */ -class document { -public: - /** - * Create a new invalid document. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline document() noexcept = default; - simdjson_really_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy - simdjson_really_inline document(document &&other) noexcept = default; - simdjson_really_inline document &operator=(const document &other) noexcept = delete; - simdjson_really_inline document &operator=(document &&other) noexcept = default; - - /** - * Cast this JSON value to an array. - * - * @returns An object that can be used to iterate the array. - * @returns INCORRECT_TYPE If the JSON value is not an array. - */ - simdjson_really_inline simdjson_result get_array() & noexcept; - /** - * Cast this JSON value to an object. - * - * @returns An object that can be used to look up or iterate fields. - * @returns INCORRECT_TYPE If the JSON value is not an object. - */ - simdjson_really_inline simdjson_result get_object() & noexcept; - /** - * Cast this JSON value to an unsigned integer. - * - * @returns A signed 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. - */ - simdjson_really_inline simdjson_result get_uint64() noexcept; - /** - * Cast this JSON value (inside string) to an unsigned integer. - * - * @returns A signed 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. - */ - simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; - /** - * Cast this JSON value to a signed integer. - * - * @returns A signed 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. - */ - simdjson_really_inline simdjson_result get_int64() noexcept; - /** - * Cast this JSON value (inside string) to a signed integer. - * - * @returns A signed 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. - */ - simdjson_really_inline simdjson_result get_int64_in_string() noexcept; - /** - * Cast this JSON value to a double. - * - * @returns A double. - * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. - */ - simdjson_really_inline simdjson_result get_double() noexcept; - - /** - * Cast this JSON value (inside string) to a double. - * - * @returns A double. - * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. - */ - simdjson_really_inline simdjson_result get_double_in_string() noexcept; - /** - * Cast this JSON value to a string. - * - * The string is guaranteed to be valid UTF-8. - * - * Important: Calling get_string() twice on the same document is an error. - * - * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next - * time it parses a document or when it is destroyed. - * @returns INCORRECT_TYPE if the JSON value is not a string. - */ - simdjson_really_inline simdjson_result get_string() noexcept; - /** - * Cast this JSON value to a raw_json_string. - * - * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). - * - * @returns A pointer to the raw JSON for the given string. - * @returns INCORRECT_TYPE if the JSON value is not a string. - */ - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - /** - * Cast this JSON value to a bool. - * - * @returns A bool value. - * @returns INCORRECT_TYPE if the JSON value is not true or false. - */ - simdjson_really_inline simdjson_result get_bool() noexcept; - /** - * Cast this JSON value to a value when the document is an object or an array. - * - * @returns A value if a JSON array or object cannot be found. - * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). - */ - simdjson_really_inline simdjson_result get_value() noexcept; - - /** - * Checks if this JSON value is null. - * - * @returns Whether the value is null. - */ - simdjson_really_inline bool is_null() noexcept; - - /** - * Get this value as the given type. - * - * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * - * You may use get_double(), get_bool(), get_uint64(), get_int64(), - * get_object(), get_array(), get_raw_json_string(), or get_string() instead. - * - * @returns A value of the given type, parsed from the JSON. - * @returns INCORRECT_TYPE If the JSON value is not the given type. - */ - template simdjson_really_inline simdjson_result get() & noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); - } - /** @overload template simdjson_result get() & noexcept */ - template simdjson_really_inline simdjson_result get() && noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); - } - - /** - * Get this value as the given type. - * - * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value - * - * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. - * - * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. - * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. - */ - template simdjson_really_inline error_code get(T &out) & noexcept; - /** @overload template error_code get(T &out) & noexcept */ - template simdjson_really_inline error_code get(T &out) && noexcept; - -#if SIMDJSON_EXCEPTIONS - /** - * Cast this JSON value to an array. - * - * @returns An object that can be used to iterate the array. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. - */ - simdjson_really_inline operator array() & noexcept(false); - /** - * Cast this JSON value to an object. - * - * @returns An object that can be used to look up or iterate fields. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. - */ - simdjson_really_inline operator object() & noexcept(false); - /** - * Cast this JSON value to an unsigned integer. - * - * @returns A signed 64-bit integer. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. - */ - simdjson_really_inline operator uint64_t() noexcept(false); - /** - * Cast this JSON value to a signed integer. - * - * @returns A signed 64-bit integer. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. - */ - simdjson_really_inline operator int64_t() noexcept(false); - /** - * Cast this JSON value to a double. - * - * @returns A double. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. - */ - simdjson_really_inline operator double() noexcept(false); - /** - * Cast this JSON value to a string. - * - * The string is guaranteed to be valid UTF-8. - * - * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next - * time it parses a document or when it is destroyed. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. - */ - simdjson_really_inline operator std::string_view() noexcept(false); - /** - * Cast this JSON value to a raw_json_string. - * - * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). - * - * @returns A pointer to the raw JSON for the given string. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. - */ - simdjson_really_inline operator raw_json_string() noexcept(false); - /** - * Cast this JSON value to a bool. - * - * @returns A bool value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. - */ - simdjson_really_inline operator bool() noexcept(false); - /** - * Cast this JSON value to a value. - * - * @returns A value value. - * @exception if a JSON value cannot be found - */ - simdjson_really_inline operator value() noexcept(false); -#endif - /** - * This method scans the array and counts the number of elements. - * The count_elements method should always be called before you have begun - * iterating through the array: it is expected that you are pointing at - * the beginning of the array. - * The runtime complexity is linear in the size of the array. After - * calling this function, if successful, the array is 'rewinded' at its - * beginning as if it had never been accessed. If the JSON is malformed (e.g., - * there is a missing comma), then an error is returned and it is no longer - * safe to continue. - */ - simdjson_really_inline simdjson_result count_elements() & noexcept; - /** - * This method scans the object and counts the number of key-value pairs. - * The count_fields method should always be called before you have begun - * iterating through the object: it is expected that you are pointing at - * the beginning of the object. - * The runtime complexity is linear in the size of the object. After - * calling this function, if successful, the object is 'rewinded' at its - * beginning as if it had never been accessed. If the JSON is malformed (e.g., - * there is a missing comma), then an error is returned and it is no longer - * safe to continue. - * - * To check that an object is empty, it is more performant to use - * the is_empty() method. - */ - simdjson_really_inline simdjson_result count_fields() & noexcept; - /** - * Get the value at the given index in the array. This function has linear-time complexity. - * This function should only be called once on an array instance since the array iterator is not reset between each call. - * - * @return The value at the given index, or: - * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length - */ - simdjson_really_inline simdjson_result at(size_t index) & noexcept; - /** - * Begin array iteration. - * - * Part of the std::iterable interface. - */ - simdjson_really_inline simdjson_result begin() & noexcept; - /** - * Sentinel representing the end of the array. - * - * Part of the std::iterable interface. - */ - simdjson_really_inline simdjson_result end() & noexcept; - - /** - * Look up a field by name on an object (order-sensitive). - * - * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the - * JSON `{ "x": 1, "y": 2, "z": 3 }`: - * - * ```c++ - * simdjson::ondemand::parser parser; - * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); - * double z = obj.find_field("z"); - * double y = obj.find_field("y"); - * double x = obj.find_field("x"); - * ``` - * - * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. - * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. - * - * - * You must consume the fields on an object one at a time. A request for a new key - * invalidates previous field values: it makes them unsafe. E.g., the array - * given by content["bids"].get_array() should not be accessed after you have called - * content["asks"].get_array(). You can detect such mistakes by first compiling and running - * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an - * OUT_OF_ORDER_ITERATION error is generated. - * - * You are expected to access keys only once. You should access the value corresponding to - * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() - * is an error. - * - * @param key The key to look up. - * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. - */ - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; - - /** - * Look up a field by name on an object, without regard to key order. - * - * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies - * and often appears negligible. It starts out normally, starting out at the last field; but if - * the field is not found, it scans from the beginning of the object to see if it missed it. That - * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object - * in question is large. The fact that the extra code is there also bumps the executable size. - * - * It is the default, however, because it would be highly surprising (and hard to debug) if the - * default behavior failed to look up a field just because it was in the wrong order--and many - * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. - * - * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). - * - * You must consume the fields on an object one at a time. A request for a new key - * invalidates previous field values: it makes them unsafe. E.g., the array - * given by content["bids"].get_array() should not be accessed after you have called - * content["asks"].get_array(). You can detect such mistakes by first compiling and running - * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an - * OUT_OF_ORDER_ITERATION error is generated. - * - * You are expected to access keys only once. You should access the value corresponding to a key - * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() - * is an error. - * - * @param key The key to look up. - * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. - */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; - - /** - * Get the type of this JSON value. - * - * NOTE: If you're only expecting a value to be one type (a typical case), it's generally - * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just - * let it throw an exception). - * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". - */ - simdjson_really_inline simdjson_result type() noexcept; - - /** - * Checks whether the document is a scalar (string, number, null, Boolean). - * Returns false when there it is an array or object. - * - * @returns true if the type is string, number, null, Boolean - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". - */ - simdjson_really_inline simdjson_result is_scalar() noexcept; - - /** - * Checks whether the document is a negative number. - * - * @returns true if the number if negative. - */ - simdjson_really_inline bool is_negative() noexcept; - /** - * Checks whether the document is an integer number. Note that - * this requires to partially parse the number string. If - * the value is determined to be an integer, it may still - * not parse properly as an integer in subsequent steps - * (e.g., it might overflow). - * - * @returns true if the number if negative. - */ - simdjson_really_inline simdjson_result is_integer() noexcept; - /** - * Determine the number type (integer or floating-point number) as quickly - * as possible. This function does not fully validate the input. It is - * useful when you only need to classify the numbers, without parsing them. - * - * If you are planning to retrieve the value or you need full validation, - * consider using the get_number() method instead: it will fully parse - * and validate the input, and give you access to the type: - * get_number().get_number_type(). - * - * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 - * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 - * Otherwise, get_number_type() has value number_type::floating_point_number - * - * This function requires processing the number string, but it is expected - * to be faster than get_number().get_number_type() because it is does not - * parse the number value. - * - * @returns the type of the number - */ - simdjson_really_inline simdjson_result get_number_type() noexcept; - - /** - * Attempt to parse an ondemand::number. An ondemand::number may - * contain an integer value or a floating-point value, the simdjson - * library will autodetect the type. Thus it is a dynamically typed - * number. Before accessing the value, you must determine the detected - * type. - * - * number.get_number_type() is number_type::signed_integer if we have - * an integer in [-9223372036854775808,9223372036854775808) - * You can recover the value by calling number.get_int64() and you - * have that number.is_int64() is true. - * - * number.get_number_type() is number_type::unsigned_integer if we have - * an integer in [9223372036854775808,18446744073709551616) - * You can recover the value by calling number.get_uint64() and you - * have that number.is_uint64() is true. - * - * Otherwise, number.get_number_type() has value number_type::floating_point_number - * and we have a binary64 number. - * You can recover the value by calling number.get_double() and you - * have that number.is_double() is true. - * - * You must check the type before accessing the value: it is an error - * to call "get_int64()" when number.get_number_type() is not - * number_type::signed_integer and when number.is_int64() is false. - */ - simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; - - /** - * Get the raw JSON for this token. - * - * The string_view will always point into the input buffer. - * - * The string_view will start at the beginning of the token, and include the entire token - * *as well as all spaces until the next token (or EOF).* This means, for example, that a - * string token always begins with a " and is always terminated by the final ", possibly - * followed by a number of spaces. - * - * The string_view is *not* null-terminated. If this is a scalar (string, number, - * boolean, or null), the character after the end of the string_view may be the padded buffer. - * - * Tokens include: - * - { - * - [ - * - "a string (possibly with UTF-8 or backslashed characters like \\\")". - * - -1.2e-100 - * - true - * - false - * - null - */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; - - /** - * Reset the iterator inside the document instance so we are pointing back at the - * beginning of the document, as if it had just been created. It invalidates all - * values, objects and arrays that you have created so far (including unescaped strings). - */ - inline void rewind() noexcept; - /** - * Returns debugging information. - */ - inline std::string to_debug_string() noexcept; - /** - * Some unrecoverable error conditions may render the document instance unusable. - * The is_alive() method returns true when the document is still suitable. - */ - inline bool is_alive() noexcept; - - /** - * Returns the current location in the document if in bounds. - */ - inline simdjson_result current_location() noexcept; - - /** - * Returns the current depth in the document if in bounds. - * - * E.g., - * 0 = finished with document - * 1 = document root value (could be [ or {, not yet known) - * 2 = , or } inside root array/object - * 3 = key or value inside root array/object. - */ - simdjson_really_inline int32_t current_depth() const noexcept; - - /** - * Get the value associated with the given JSON pointer. We use the RFC 6901 - * https://tools.ietf.org/html/rfc6901 standard. - * - * ondemand::parser parser; - * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; - * auto doc = parser.iterate(json); - * doc.at_pointer("/foo/a/1") == 20 - * - * It is allowed for a key to be the empty string: - * - * ondemand::parser parser; - * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; - * auto doc = parser.iterate(json); - * doc.at_pointer("//a/1") == 20 - * - * Note that at_pointer() automatically calls rewind between each call. Thus - * all values, objects and arrays that you have created so far (including unescaped strings) - * are invalidated. After calling at_pointer, you need to consume the result: string values - * should be stored in your own variables, arrays should be decoded and stored in your own array-like - * structures and so forth. - * - * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching - * - * @return The value associated with the given JSON pointer, or: - * - NO_SUCH_FIELD if a field does not exist in an object - * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length - * - INCORRECT_TYPE if a non-integer is used to access an array - * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed - * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). - */ - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; - /** - * Consumes the document and returns a string_view instance corresponding to the - * document as represented in JSON. It points inside the original byte array containing - * the JSON document. - */ - simdjson_really_inline simdjson_result raw_json() noexcept; -protected: - /** - * Consumes the document. - */ - simdjson_really_inline error_code consume() noexcept; - - simdjson_really_inline document(ondemand::json_iterator &&iter) noexcept; - simdjson_really_inline const uint8_t *text(uint32_t idx) const noexcept; - - simdjson_really_inline value_iterator resume_value_iterator() noexcept; - simdjson_really_inline value_iterator get_root_value_iterator() noexcept; - simdjson_really_inline simdjson_result start_or_resume_object() noexcept; - static simdjson_really_inline document start(ondemand::json_iterator &&iter) noexcept; - - // - // Fields - // - json_iterator iter{}; ///< Current position in the document - static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 - - friend class array_iterator; - friend class value; - friend class ondemand::parser; - friend class object; - friend class array; - friend class field; - friend class token; - friend class document_stream; -}; - - -/** - * A document_reference is a thin wrapper around a document reference instance. - */ -class document_reference { -public: - simdjson_really_inline document_reference() noexcept; - simdjson_really_inline document_reference(document &d) noexcept; - simdjson_really_inline document_reference(const document_reference &other) noexcept = default; - simdjson_really_inline document_reference& operator=(const document_reference &other) noexcept = default; - simdjson_really_inline void rewind() noexcept; - simdjson_really_inline simdjson_result get_array() & noexcept; - simdjson_really_inline simdjson_result get_object() & noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline simdjson_result get_value() noexcept; - - simdjson_really_inline bool is_null() noexcept; - simdjson_really_inline simdjson_result raw_json() noexcept; - simdjson_really_inline operator document&() const noexcept; - -#if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator array() & noexcept(false); - simdjson_really_inline operator object() & noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); - simdjson_really_inline operator value() noexcept(false); -#endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) & noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; - - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; - - simdjson_really_inline simdjson_result current_location() noexcept; - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline bool is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; - simdjson_really_inline simdjson_result raw_json_token() noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; -private: - document *doc{nullptr}; -}; -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline error_code rewind() noexcept; - - simdjson_really_inline simdjson_result get_array() & noexcept; - simdjson_really_inline simdjson_result get_object() & noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_double_from_string() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline simdjson_result get_value() noexcept; - simdjson_really_inline bool is_null() noexcept; - - template simdjson_really_inline simdjson_result get() & noexcept; - template simdjson_really_inline simdjson_result get() && noexcept; - - template simdjson_really_inline error_code get(T &out) & noexcept; - template simdjson_really_inline error_code get(T &out) && noexcept; - -#if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); -#endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) & noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; - simdjson_really_inline simdjson_result current_location() noexcept; - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline bool is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; - /** @copydoc simdjson_really_inline std::string_view document::raw_json_token() const noexcept */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; - - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; -}; - - -} // namespace simdjson - - - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept; - simdjson_really_inline simdjson_result() noexcept = default; - simdjson_really_inline error_code rewind() noexcept; - - simdjson_really_inline simdjson_result get_array() & noexcept; - simdjson_really_inline simdjson_result get_object() & noexcept; - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline simdjson_result get_value() noexcept; - simdjson_really_inline bool is_null() noexcept; - -#if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); -#endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) & noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(const char *key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](const char *key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(const char *key) & noexcept; - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; - simdjson_really_inline simdjson_result current_location() noexcept; - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline bool is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; - /** @copydoc simdjson_really_inline std::string_view document_reference::raw_json_token() const noexcept */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; - - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; -}; - - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/document.h */ -/* begin file include/simdjson/generic/ondemand/value.h */ - -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -class array; -class document; -class field; -class object; -class raw_json_string; - -/** - * An ephemeral JSON value returned during iteration. - */ -class value { -public: - /** - * Create a new invalid value. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline value() noexcept = default; - - /** - * Get this value as the given type. - * - * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * - * You may use get_double(), get_bool(), get_uint64(), get_int64(), - * get_object(), get_array(), get_raw_json_string(), or get_string() instead. - * - * @returns A value of the given type, parsed from the JSON. - * @returns INCORRECT_TYPE If the JSON value is not the given type. - */ - template simdjson_really_inline simdjson_result get() noexcept { - // Unless the simdjson library provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); - } - - /** - * Get this value as the given type. - * - * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool - * - * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. - * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. - */ - template simdjson_really_inline error_code get(T &out) noexcept; - - /** - * Cast this JSON value to an array. - * - * @returns An object that can be used to iterate the array. - * @returns INCORRECT_TYPE If the JSON value is not an array. - */ - simdjson_really_inline simdjson_result get_array() noexcept; - - /** - * Cast this JSON value to an object. - * - * @returns An object that can be used to look up or iterate fields. - * @returns INCORRECT_TYPE If the JSON value is not an object. - */ - simdjson_really_inline simdjson_result get_object() noexcept; - - /** - * Cast this JSON value to an unsigned integer. - * - * @returns A unsigned 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. - */ - simdjson_really_inline simdjson_result get_uint64() noexcept; - - /** - * Cast this JSON value (inside string) to a unsigned integer. - * - * @returns A unsigned 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. - */ - simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; - - /** - * Cast this JSON value to a signed integer. - * - * @returns A signed 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. - */ - simdjson_really_inline simdjson_result get_int64() noexcept; - - /** - * Cast this JSON value (inside string) to a signed integer. - * - * @returns A signed 64-bit integer. - * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. - */ - simdjson_really_inline simdjson_result get_int64_in_string() noexcept; - - /** - * Cast this JSON value to a double. - * - * @returns A double. - * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. - */ - simdjson_really_inline simdjson_result get_double() noexcept; - - /** - * Cast this JSON value (inside string) to a double - * - * @returns A double. - * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. - */ - simdjson_really_inline simdjson_result get_double_in_string() noexcept; - - /** - * Cast this JSON value to a string. - * - * The string is guaranteed to be valid UTF-8. - * - * Equivalent to get(). - * - * Important: a value should be consumed once. Calling get_string() twice on the same value - * is an error. - * - * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next - * time it parses a document or when it is destroyed. - * @returns INCORRECT_TYPE if the JSON value is not a string. - */ - simdjson_really_inline simdjson_result get_string() noexcept; - - /** - * Cast this JSON value to a raw_json_string. - * - * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). - * - * @returns A pointer to the raw JSON for the given string. - * @returns INCORRECT_TYPE if the JSON value is not a string. - */ - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - - /** - * Cast this JSON value to a bool. - * - * @returns A bool value. - * @returns INCORRECT_TYPE if the JSON value is not true or false. - */ - simdjson_really_inline simdjson_result get_bool() noexcept; - - /** - * Checks if this JSON value is null. - * - * @returns Whether the value is null. - */ - simdjson_really_inline bool is_null() noexcept; - -#if SIMDJSON_EXCEPTIONS - /** - * Cast this JSON value to an array. - * - * @returns An object that can be used to iterate the array. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. - */ - simdjson_really_inline operator array() noexcept(false); - /** - * Cast this JSON value to an object. - * - * @returns An object that can be used to look up or iterate fields. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. - */ - simdjson_really_inline operator object() noexcept(false); - /** - * Cast this JSON value to an unsigned integer. - * - * @returns A signed 64-bit integer. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. - */ - simdjson_really_inline operator uint64_t() noexcept(false); - /** - * Cast this JSON value to a signed integer. - * - * @returns A signed 64-bit integer. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. - */ - simdjson_really_inline operator int64_t() noexcept(false); - /** - * Cast this JSON value to a double. - * - * @returns A double. - * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. - */ - simdjson_really_inline operator double() noexcept(false); - /** - * Cast this JSON value to a string. - * - * The string is guaranteed to be valid UTF-8. - * - * Equivalent to get(). - * - * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next - * time it parses a document or when it is destroyed. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. - */ - simdjson_really_inline operator std::string_view() noexcept(false); - /** - * Cast this JSON value to a raw_json_string. - * - * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). - * - * @returns A pointer to the raw JSON for the given string. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. - */ - simdjson_really_inline operator raw_json_string() noexcept(false); - /** - * Cast this JSON value to a bool. - * - * @returns A bool value. - * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. - */ - simdjson_really_inline operator bool() noexcept(false); -#endif - - /** - * Begin array iteration. - * - * Part of the std::iterable interface. - * - * @returns INCORRECT_TYPE If the JSON value is not an array. - */ - simdjson_really_inline simdjson_result begin() & noexcept; - /** - * Sentinel representing the end of the array. - * - * Part of the std::iterable interface. - */ - simdjson_really_inline simdjson_result end() & noexcept; - /** - * This method scans the array and counts the number of elements. - * The count_elements method should always be called before you have begun - * iterating through the array: it is expected that you are pointing at - * the beginning of the array. - * The runtime complexity is linear in the size of the array. After - * calling this function, if successful, the array is 'rewinded' at its - * beginning as if it had never been accessed. If the JSON is malformed (e.g., - * there is a missing comma), then an error is returned and it is no longer - * safe to continue. - */ - simdjson_really_inline simdjson_result count_elements() & noexcept; - /** - * This method scans the object and counts the number of key-value pairs. - * The count_fields method should always be called before you have begun - * iterating through the object: it is expected that you are pointing at - * the beginning of the object. - * The runtime complexity is linear in the size of the object. After - * calling this function, if successful, the object is 'rewinded' at its - * beginning as if it had never been accessed. If the JSON is malformed (e.g., - * there is a missing comma), then an error is returned and it is no longer - * safe to continue. - * - * To check that an object is empty, it is more performant to use - * the is_empty() method on the object instance. - */ - simdjson_really_inline simdjson_result count_fields() & noexcept; - /** - * Get the value at the given index in the array. This function has linear-time complexity. - * This function should only be called once on an array instance since the array iterator is not reset between each call. - * - * @return The value at the given index, or: - * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length - */ - simdjson_really_inline simdjson_result at(size_t index) noexcept; - /** - * Look up a field by name on an object (order-sensitive). - * - * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the - * JSON `{ "x": 1, "y": 2, "z": 3 }`: - * - * ```c++ - * simdjson::ondemand::parser parser; - * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); - * double z = obj.find_field("z"); - * double y = obj.find_field("y"); - * double x = obj.find_field("x"); - * ``` - * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful - * that only one field is returned. - - * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. - * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. - * - * @param key The key to look up. - * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. - */ - simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field(const char *key) noexcept; - - /** - * Look up a field by name on an object, without regard to key order. - * - * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies - * and often appears negligible. It starts out normally, starting out at the last field; but if - * the field is not found, it scans from the beginning of the object to see if it missed it. That - * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object - * in question is large. The fact that the extra code is there also bumps the executable size. - * - * It is the default, however, because it would be highly surprising (and hard to debug) if the - * default behavior failed to look up a field just because it was in the wrong order--and many - * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. - * - * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful - * that only one field is returned. - * - * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). - * - * @param key The key to look up. - * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. - */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(const char *key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](const char *key) noexcept; - - /** - * Get the type of this JSON value. - * - * NOTE: If you're only expecting a value to be one type (a typical case), it's generally - * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just - * let it throw an exception). - * - * @return The type of JSON value (json_type::array, json_type::object, json_type::string, - * json_type::number, json_type::boolean, or json_type::null). - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". - */ - simdjson_really_inline simdjson_result type() noexcept; - - /** - * Checks whether the value is a scalar (string, number, null, Boolean). - * Returns false when there it is an array or object. - * - * @returns true if the type is string, number, null, Boolean - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". - */ - simdjson_really_inline simdjson_result is_scalar() noexcept; - - /** - * Checks whether the value is a negative number. - * - * @returns true if the number if negative. - */ - simdjson_really_inline bool is_negative() noexcept; - /** - * Checks whether the value is an integer number. Note that - * this requires to partially parse the number string. If - * the value is determined to be an integer, it may still - * not parse properly as an integer in subsequent steps - * (e.g., it might overflow). - * - * Performance note: if you call this function systematically - * before parsing a number, you may have fallen for a performance - * anti-pattern. - * - * @returns true if the number if negative. - */ - simdjson_really_inline simdjson_result is_integer() noexcept; - /** - * Determine the number type (integer or floating-point number) as quickly - * as possible. This function does not fully validate the input. It is - * useful when you only need to classify the numbers, without parsing them. - * - * If you are planning to retrieve the value or you need full validation, - * consider using the get_number() method instead: it will fully parse - * and validate the input, and give you access to the type: - * get_number().get_number_type(). - * - * get_number_type() is number_type::unsigned_integer if we have - * an integer greater or equal to 9223372036854775808 - * get_number_type() is number_type::signed_integer if we have an - * integer that is less than 9223372036854775808 - * Otherwise, get_number_type() has value number_type::floating_point_number - * - * This function requires processing the number string, but it is expected - * to be faster than get_number().get_number_type() because it is does not - * parse the number value. - * - * @returns the type of the number - */ - simdjson_really_inline simdjson_result get_number_type() noexcept; - - /** - * Attempt to parse an ondemand::number. An ondemand::number may - * contain an integer value or a floating-point value, the simdjson - * library will autodetect the type. Thus it is a dynamically typed - * number. Before accessing the value, you must determine the detected - * type. - * - * number.get_number_type() is number_type::signed_integer if we have - * an integer in [-9223372036854775808,9223372036854775808) - * You can recover the value by calling number.get_int64() and you - * have that number.is_int64() is true. - * - * number.get_number_type() is number_type::unsigned_integer if we have - * an integer in [9223372036854775808,18446744073709551616) - * You can recover the value by calling number.get_uint64() and you - * have that number.is_uint64() is true. - * - * Otherwise, number.get_number_type() has value number_type::floating_point_number - * and we have a binary64 number. - * You can recover the value by calling number.get_double() and you - * have that number.is_double() is true. - * - * You must check the type before accessing the value: it is an error - * to call "get_int64()" when number.get_number_type() is not - * number_type::signed_integer and when number.is_int64() is false. - * - * Performance note: this is designed with performance in mind. When - * calling 'get_number()', you scan the number string only once, determining - * efficiently the type and storing it in an efficient manner. - */ - simdjson_warn_unused simdjson_really_inline simdjson_result get_number() noexcept; - - - /** - * Get the raw JSON for this token. - * - * The string_view will always point into the input buffer. - * - * The string_view will start at the beginning of the token, and include the entire token - * *as well as all spaces until the next token (or EOF).* This means, for example, that a - * string token always begins with a " and is always terminated by the final ", possibly - * followed by a number of spaces. - * - * The string_view is *not* null-terminated. However, if this is a scalar (string, number, - * boolean, or null), the character after the end of the string_view is guaranteed to be - * a non-space token. - * - * Tokens include: - * - { - * - [ - * - "a string (possibly with UTF-8 or backslashed characters like \\\")". - * - -1.2e-100 - * - true - * - false - * - null - */ - simdjson_really_inline std::string_view raw_json_token() noexcept; - - /** - * Returns the current location in the document if in bounds. - */ - simdjson_really_inline simdjson_result current_location() noexcept; - - /** - * Returns the current depth in the document if in bounds. - * - * E.g., - * 0 = finished with document - * 1 = document root value (could be [ or {, not yet known) - * 2 = , or } inside root array/object - * 3 = key or value inside root array/object. - */ - simdjson_really_inline int32_t current_depth() const noexcept; - - /** - * Get the value associated with the given JSON pointer. We use the RFC 6901 - * https://tools.ietf.org/html/rfc6901 standard. - * - * ondemand::parser parser; - * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; - * auto doc = parser.iterate(json); - * doc.at_pointer("/foo/a/1") == 20 - * - * It is allowed for a key to be the empty string: - * - * ondemand::parser parser; - * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; - * auto doc = parser.iterate(json); - * doc.at_pointer("//a/1") == 20 - * - * Note that at_pointer() called on the document automatically calls the document's rewind - * method between each call. It invalidates all previously accessed arrays, objects and values - * that have not been consumed. - * - * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not - * standardized (by RFC 6901). We provide some experimental support for JSON pointers - * on non-document instances. Yet it is not the case when calling at_pointer on an array - * or an object instance: there is no rewind and no invalidation. - * - * You may only call at_pointer on an array after it has been created, but before it has - * been first accessed. When calling at_pointer on an array, the pointer is advanced to - * the location indicated by the JSON pointer (in case of success). It is no longer possible - * to call at_pointer on the same array. - * - * You may call at_pointer more than once on an object, but each time the pointer is advanced - * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding - * key (as well as the current key) can no longer be used with following JSON pointer calls. - * - * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching - * - * @return The value associated with the given JSON pointer, or: - * - NO_SUCH_FIELD if a field does not exist in an object - * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length - * - INCORRECT_TYPE if a non-integer is used to access an array - * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed - */ - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; - -protected: - /** - * Create a value. - */ - simdjson_really_inline value(const value_iterator &iter) noexcept; - - /** - * Skip this value, allowing iteration to continue. - */ - simdjson_really_inline void skip() noexcept; - - /** - * Start a value at the current position. - * - * (It should already be started; this is just a self-documentation method.) - */ - static simdjson_really_inline value start(const value_iterator &iter) noexcept; - - /** - * Resume a value. - */ - static simdjson_really_inline value resume(const value_iterator &iter) noexcept; - - /** - * Get the object, starting or resuming it as necessary - */ - simdjson_really_inline simdjson_result start_or_resume_object() noexcept; - - // simdjson_really_inline void log_value(const char *type) const noexcept; - // simdjson_really_inline void log_error(const char *message) const noexcept; - - value_iterator iter{}; - - friend class document; - friend class array_iterator; - friend class field; - friend class object; - friend struct simdjson_result; - friend struct simdjson_result; -}; - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - - simdjson_really_inline simdjson_result get_array() noexcept; - simdjson_really_inline simdjson_result get_object() noexcept; - - simdjson_really_inline simdjson_result get_uint64() noexcept; - simdjson_really_inline simdjson_result get_uint64_in_string() noexcept; - simdjson_really_inline simdjson_result get_int64() noexcept; - simdjson_really_inline simdjson_result get_int64_in_string() noexcept; - simdjson_really_inline simdjson_result get_double() noexcept; - simdjson_really_inline simdjson_result get_double_in_string() noexcept; - simdjson_really_inline simdjson_result get_string() noexcept; - simdjson_really_inline simdjson_result get_raw_json_string() noexcept; - simdjson_really_inline simdjson_result get_bool() noexcept; - simdjson_really_inline bool is_null() noexcept; - - template simdjson_really_inline simdjson_result get() noexcept; - - template simdjson_really_inline error_code get(T &out) noexcept; - -#if SIMDJSON_EXCEPTIONS - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false); - simdjson_really_inline operator uint64_t() noexcept(false); - simdjson_really_inline operator int64_t() noexcept(false); - simdjson_really_inline operator double() noexcept(false); - simdjson_really_inline operator std::string_view() noexcept(false); - simdjson_really_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); - simdjson_really_inline operator bool() noexcept(false); -#endif - simdjson_really_inline simdjson_result count_elements() & noexcept; - simdjson_really_inline simdjson_result count_fields() & noexcept; - simdjson_really_inline simdjson_result at(size_t index) noexcept; - simdjson_really_inline simdjson_result begin() & noexcept; - simdjson_really_inline simdjson_result end() & noexcept; - - /** - * Look up a field by name on an object (order-sensitive). - * - * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the - * JSON `{ "x": 1, "y": 2, "z": 3 }`: - * - * ```c++ - * simdjson::ondemand::parser parser; - * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); - * double z = obj.find_field("z"); - * double y = obj.find_field("y"); - * double x = obj.find_field("x"); - * ``` - * - * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. - * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. - * - * @param key The key to look up. - * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. - */ - simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field(const char *key) noexcept; - - /** - * Look up a field by name on an object, without regard to key order. - * - * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies - * and often appears negligible. It starts out normally, starting out at the last field; but if - * the field is not found, it scans from the beginning of the object to see if it missed it. That - * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object - * in question is large. The fact that the extra code is there also bumps the executable size. - * - * It is the default, however, because it would be highly surprising (and hard to debug) if the - * default behavior failed to look up a field just because it was in the wrong order--and many - * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. - * - * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). - * - * @param key The key to look up. - * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. - */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(const char *key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ - simdjson_really_inline simdjson_result operator[](const char *key) noexcept; - - /** - * Get the type of this JSON value. - * - * NOTE: If you're only expecting a value to be one type (a typical case), it's generally - * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just - * let it throw an exception). - */ - simdjson_really_inline simdjson_result type() noexcept; - simdjson_really_inline simdjson_result is_scalar() noexcept; - simdjson_really_inline simdjson_result is_negative() noexcept; - simdjson_really_inline simdjson_result is_integer() noexcept; - simdjson_really_inline simdjson_result get_number_type() noexcept; - simdjson_really_inline simdjson_result get_number() noexcept; - - /** @copydoc simdjson_really_inline std::string_view value::raw_json_token() const noexcept */ - simdjson_really_inline simdjson_result raw_json_token() noexcept; - - /** @copydoc simdjson_really_inline simdjson_result current_location() noexcept */ - simdjson_really_inline simdjson_result current_location() noexcept; - /** @copydoc simdjson_really_inline int32_t current_depth() const noexcept */ - simdjson_really_inline int32_t current_depth() const noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/value.h */ -/* begin file include/simdjson/generic/ondemand/field.h */ - -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -/** - * A JSON field (key/value pair) in an object. - * - * Returned from object iteration. - * - * Extends from std::pair so you can use C++ algorithms that rely on pairs. - */ -class field : public std::pair { -public: - /** - * Create a new invalid field. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline field() noexcept; - - /** - * Get the key as a string_view (for higher speed, consider raw_key). - * We deliberately use a more cumbersome name (unescaped_key) to force users - * to think twice about using it. - * - * This consumes the key: once you have called unescaped_key(), you cannot - * call it again nor can you call key(). - */ - simdjson_really_inline simdjson_warn_unused simdjson_result unescaped_key() noexcept; - /** - * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". - */ - simdjson_really_inline raw_json_string key() const noexcept; - /** - * Get the field value. - */ - simdjson_really_inline ondemand::value &value() & noexcept; - /** - * @overload ondemand::value &ondemand::value() & noexcept - */ - simdjson_really_inline ondemand::value value() && noexcept; - -protected: - simdjson_really_inline field(raw_json_string key, ondemand::value &&value) noexcept; - static simdjson_really_inline simdjson_result start(value_iterator &parent_iter) noexcept; - static simdjson_really_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; - friend struct simdjson_result; - friend class object_iterator; -}; - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - - simdjson_really_inline simdjson_result unescaped_key() noexcept; - simdjson_really_inline simdjson_result key() noexcept; - simdjson_really_inline simdjson_result value() noexcept; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/field.h */ -/* begin file include/simdjson/generic/ondemand/object.h */ - -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -/** - * A forward-only JSON object field iterator. - */ -class object { -public: - /** - * Create a new invalid object. - * - * Exists so you can declare a variable and later assign to it before use. - */ - simdjson_really_inline object() noexcept = default; - - simdjson_really_inline simdjson_result begin() noexcept; - simdjson_really_inline simdjson_result end() noexcept; - /** - * Look up a field by name on an object (order-sensitive). - * - * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the - * JSON `{ "x": 1, "y": 2, "z": 3 }`: - * - * ```c++ - * simdjson::ondemand::parser parser; - * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); - * double z = obj.find_field("z"); - * double y = obj.find_field("y"); - * double x = obj.find_field("x"); - * ``` - * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful - * that only one field is returned. - * - * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. - * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. - * - * You must consume the fields on an object one at a time. A request for a new key - * invalidates previous field values: it makes them unsafe. E.g., the array - * given by content["bids"].get_array() should not be accessed after you have called - * content["asks"].get_array(). You can detect such mistakes by first compiling and running - * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an - * OUT_OF_ORDER_ITERATION error is generated. - * - * You are expected to access keys only once. You should access the value corresponding to a - * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() - * is an error. - * - * @param key The key to look up. - * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. - */ - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field(std::string_view key) && noexcept; - - /** - * Look up a field by name on an object, without regard to key order. - * - * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies - * and often appears negligible. It starts out normally, starting out at the last field; but if - * the field is not found, it scans from the beginning of the object to see if it missed it. That - * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object - * in question is large. The fact that the extra code is there also bumps the executable size. - * - * It is the default, however, because it would be highly surprising (and hard to debug) if the - * default behavior failed to look up a field just because it was in the wrong order--and many - * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. - * - * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the - * field wasn't there when they aren't). - * - * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful - * that only one field is returned. - * - * You must consume the fields on an object one at a time. A request for a new key - * invalidates previous field values: it makes them unsafe. E.g., the array - * given by content["bids"].get_array() should not be accessed after you have called - * content["asks"].get_array(). You can detect such mistakes by first compiling and running - * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an - * OUT_OF_ORDER_ITERATION error is generated. - * - * You are expected to access keys only once. You should access the value corresponding to a key - * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. - * - * @param key The key to look up. - * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. - */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - /** @overload simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ - simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; - - /** - * Get the value associated with the given JSON pointer. We use the RFC 6901 - * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node - * as the root of its own JSON document. - * - * ondemand::parser parser; - * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; - * auto doc = parser.iterate(json); - * doc.at_pointer("/foo/a/1") == 20 - * - * It is allowed for a key to be the empty string: - * - * ondemand::parser parser; - * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; - * auto doc = parser.iterate(json); - * doc.at_pointer("//a/1") == 20 - * - * Note that at_pointer() called on the document automatically calls the document's rewind - * method between each call. It invalidates all previously accessed arrays, objects and values - * that have not been consumed. Yet it is not the case when calling at_pointer on an object - * instance: there is no rewind and no invalidation. - * - * You may call at_pointer more than once on an object, but each time the pointer is advanced - * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding - * key (as well as the current key) can no longer be used with following JSON pointer calls. - * - * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. - * - * @return The value associated with the given JSON pointer, or: - * - NO_SUCH_FIELD if a field does not exist in an object - * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length - * - INCORRECT_TYPE if a non-integer is used to access an array - * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed - */ - inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; - - /** - * Reset the iterator so that we are pointing back at the - * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object - * means that you may need to reparse it anew: it is not a free operation. - * - * @returns true if the object contains some elements (not empty) - */ - inline simdjson_result reset() & noexcept; - /** - * This method scans the beginning of the object and checks whether the - * object is empty. - * The runtime complexity is constant time. After - * calling this function, if successful, the object is 'rewinded' at its - * beginning as if it had never been accessed. If the JSON is malformed (e.g., - * there is a missing comma), then an error is returned and it is no longer - * safe to continue. - */ - inline simdjson_result is_empty() & noexcept; - /** - * This method scans the object and counts the number of key-value pairs. - * The count_fields method should always be called before you have begun - * iterating through the object: it is expected that you are pointing at - * the beginning of the object. - * The runtime complexity is linear in the size of the object. After - * calling this function, if successful, the object is 'rewinded' at its - * beginning as if it had never been accessed. If the JSON is malformed (e.g., - * there is a missing comma), then an error is returned and it is no longer - * safe to continue. - * - * To check that an object is empty, it is more performant to use - * the is_empty() method. - */ - simdjson_really_inline simdjson_result count_fields() & noexcept; - /** - * Consumes the object and returns a string_view instance corresponding to the - * object as represented in JSON. It points inside the original byte array containing - * the JSON document. - */ - simdjson_really_inline simdjson_result raw_json() noexcept; - -protected: - /** - * Go to the end of the object, no matter where you are right now. - */ - simdjson_really_inline error_code consume() noexcept; - static simdjson_really_inline simdjson_result start(value_iterator &iter) noexcept; - static simdjson_really_inline simdjson_result start_root(value_iterator &iter) noexcept; - static simdjson_really_inline simdjson_result started(value_iterator &iter) noexcept; - static simdjson_really_inline object resume(const value_iterator &iter) noexcept; - simdjson_really_inline object(const value_iterator &iter) noexcept; - - simdjson_warn_unused simdjson_really_inline error_code find_field_raw(const std::string_view key) noexcept; - - value_iterator iter{}; - - friend class value; - friend class document; - friend struct simdjson_result; -}; - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; - - simdjson_really_inline simdjson_result begin() noexcept; - simdjson_really_inline simdjson_result end() noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field(std::string_view key) && noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; - simdjson_really_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) & noexcept; - simdjson_really_inline simdjson_result operator[](std::string_view key) && noexcept; - simdjson_really_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; - inline simdjson_result reset() noexcept; - inline simdjson_result is_empty() noexcept; - inline simdjson_result count_fields() & noexcept; - -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/object.h */ -/* begin file include/simdjson/generic/ondemand/parser.h */ - -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -class array; -class object; -class value; -class raw_json_string; -class document_stream; - -/** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value - * in the future. - */ -static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; -/** - * Some adversary might try to set the batch size to 0 or 1, which might cause problems. - * We set a minimum of 32B since anything else is highly likely to be an error. In practice, - * most users will want a much larger batch size. - * - * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON - * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. - */ -static constexpr size_t MINIMAL_BATCH_SIZE = 32; - -/** - * A JSON fragment iterator. - * - * This holds the actual iterator as well as the buffer for writing strings. - */ -class parser { -public: - /** - * Create a JSON parser. - * - * The new parser will have zero capacity. - */ - inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; - - inline parser(parser &&other) noexcept = default; - simdjson_really_inline parser(const parser &other) = delete; - simdjson_really_inline parser &operator=(const parser &other) = delete; - simdjson_really_inline parser &operator=(parser &&other) noexcept = default; - - /** Deallocate the JSON parser. */ - inline ~parser() noexcept = default; - - /** - * Start iterating an on-demand JSON document. - * - * ondemand::parser parser; - * document doc = parser.iterate(json); - * - * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. - * Otherwise the iterate method may return an error. In particular, the whole input should be - * valid: we do not attempt to tolerate incorrect content either before or after a JSON - * document. - * - * ### IMPORTANT: Validate what you use - * - * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to - * iterate does not parse and validate the whole document. - * - * ### IMPORTANT: Buffer Lifetime - * - * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as - * long as the document iteration. - * - * ### IMPORTANT: Document Lifetime - * - * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during - * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before - * you call parse() again or destroy the parser. - * - * ### REQUIRED: Buffer Padding - * - * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. - * - * @param json The JSON to parse. - * @param len The length of the JSON. - * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). - * - * @return The document, or an error: - * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. - * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory - * allocation fails. - * - EMPTY if the document is all whitespace. - * - UTF8_ERROR if the document is not valid UTF-8. - * - UNESCAPED_CHARS if a string contains control characters that must be escaped - * - UNCLOSED_STRING if there is an unclosed string in the document. - */ - simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ - simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ - simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ - simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ - simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ - simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ - simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ - simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; - - /** - * @private - * - * Start iterating an on-demand JSON document. - * - * ondemand::parser parser; - * json_iterator doc = parser.iterate(json); - * - * ### IMPORTANT: Buffer Lifetime - * - * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as - * long as the document iteration. - * - * ### IMPORTANT: Document Lifetime - * - * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during - * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before - * you call parse() again or destroy the parser. - * - * The ondemand::document instance holds the iterator. The document must remain in scope - * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. - * - * ### REQUIRED: Buffer Padding - * - * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. - * - * @param json The JSON to parse. - * - * @return The iterator, or an error: - * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. - * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory - * allocation fails. - * - EMPTY if the document is all whitespace. - * - UTF8_ERROR if the document is not valid UTF-8. - * - UNESCAPED_CHARS if a string contains control characters that must be escaped - * - UNCLOSED_STRING if there is an unclosed string in the document. - */ - simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; - - - /** - * Parse a buffer containing many JSON documents. - * - * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; - * ondemand::parser parser; - * ondemand::document_stream docs = parser.iterate_many(json); - * for (auto & doc : docs) { - * std::cout << doc["foo"] << std::endl; - * } - * // Prints 1 2 3 - * - * No copy of the input buffer is made. - * - * The function is lazy: it may be that no more than one JSON document at a time is parsed. - * - * The caller is responsabile to ensure that the input string data remains unchanged and is - * not deleted during the loop. - * - * ### Format - * - * The buffer must contain a series of one or more JSON documents, concatenated into a single - * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, - * then starts parsing the next document at that point. (It does this with more parallelism and - * lookahead than you might think, though.) - * - * documents that consist of an object or array may omit the whitespace between them, concatenating - * with no separator. Documents that consist of a single primitive (i.e. documents that are not - * arrays or objects) MUST be separated with ASCII whitespace. - * - * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). - * - * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. - * Setting batch_size to excessively large or excessively small values may impact negatively the - * performance. - * - * ### REQUIRED: Buffer Padding - * - * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what - * those bytes are initialized to, as long as they are allocated. - * - * ### Threads - * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. - * - * ### Parser Capacity - * - * If the parser's current capacity is less than batch_size, it will allocate enough capacity - * to handle it (up to max_capacity). - * - * @param buf The concatenated JSON to parse. - * @param len The length of the concatenated JSON. - * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet - * spot is cache-related: small enough to fit in cache, yet big enough to - * parse as many documents as possible in one tight loop. - * Defaults to 10MB, which has been a reasonable sweet spot in our tests. - * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: - * - MEMALLOC if the parser does not have enough capacity and memory allocation fails - * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. - * - other json errors if parsing fails. You should not rely on these errors to always the same for the - * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). - */ - inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; - /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; - /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size) = delete;// unsafe - /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ - inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size) = delete;// unsafe - - /** @private We do not want to allow implicit conversion from C string to std::string. */ - simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; - - /** The capacity of this parser (the largest document it can process). */ - simdjson_really_inline size_t capacity() const noexcept; - /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_really_inline size_t max_capacity() const noexcept; - simdjson_really_inline void set_max_capacity(size_t max_capacity) noexcept; - /** The maximum depth of this parser (the most deeply nested objects and arrays it can process). */ - simdjson_really_inline size_t max_depth() const noexcept; - - /** - * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length - * and `max_depth` depth. - * - * @param capacity The new capacity. - * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. - * @return The error, if there is one. - */ - simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; - - #ifdef SIMDJSON_THREADS_ENABLED - /** - * The parser instance can use threads when they are available to speed up some - * operations. It is enabled by default. Changing this attribute will change the - * behavior of the parser for future operations. - */ - bool threaded{true}; - #endif - -private: - /** @private [for benchmarking access] The implementation to use */ - std::unique_ptr implementation{}; - size_t _capacity{0}; - size_t _max_capacity; - size_t _max_depth{DEFAULT_MAX_DEPTH}; - std::unique_ptr string_buf{}; -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - std::unique_ptr start_positions{}; -#endif - - friend class json_iterator; - friend class document_stream; -}; - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/parser.h */ -/* begin file include/simdjson/generic/ondemand/document_stream.h */ -#ifdef SIMDJSON_THREADS_ENABLED -#include -#include -#include -#endif - -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -class parser; -class json_iterator; -class document; - -#ifdef SIMDJSON_THREADS_ENABLED -/** @private Custom worker class **/ -struct stage1_worker { - stage1_worker() noexcept = default; - stage1_worker(const stage1_worker&) = delete; - stage1_worker(stage1_worker&&) = delete; - stage1_worker operator=(const stage1_worker&) = delete; - ~stage1_worker(); - /** - * We only start the thread when it is needed, not at object construction, this may throw. - * You should only call this once. - **/ - void start_thread(); - /** - * Start a stage 1 job. You should first call 'run', then 'finish'. - * You must call start_thread once before. - */ - void run(document_stream * ds, parser * stage1, size_t next_batch_start); - /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ - void finish(); - -private: - - /** - * Normally, we would never stop the thread. But we do in the destructor. - * This function is only safe assuming that you are not waiting for results. You - * should have called run, then finish, and be done. - **/ - void stop_thread(); - - std::thread thread{}; - /** These three variables define the work done by the thread. **/ - ondemand::parser * stage1_thread_parser{}; - size_t _next_batch_start{}; - document_stream * owner{}; - /** - * We have two state variables. This could be streamlined to one variable in the future but - * we use two for clarity. - */ - bool has_work{false}; - bool can_work{true}; - - /** - * We lock using a mutex. - */ - std::mutex locking_mutex{}; - std::condition_variable cond_var{}; - - friend class document_stream; -}; -#endif // SIMDJSON_THREADS_ENABLED - -/** - * A forward-only stream of documents. - * - * Produced by parser::iterate_many. - * - */ -class document_stream { -public: - /** - * Construct an uninitialized document_stream. - * - * ```c++ - * document_stream docs; - * auto error = parser.iterate_many(json).get(docs); - * ``` - */ - simdjson_really_inline document_stream() noexcept; - /** Move one document_stream to another. */ - simdjson_really_inline document_stream(document_stream &&other) noexcept = default; - /** Move one document_stream to another. */ - simdjson_really_inline document_stream &operator=(document_stream &&other) noexcept = default; - - simdjson_really_inline ~document_stream() noexcept; - - /** - * Returns the input size in bytes. - */ - inline size_t size_in_bytes() const noexcept; - - /** - * After iterating through the stream, this method - * returns the number of bytes that were not parsed at the end - * of the stream. If truncated_bytes() differs from zero, - * then the input was truncated maybe because incomplete JSON - * documents were found at the end of the stream. You - * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). - * - * You should only call truncated_bytes() after streaming through all - * documents, like so: - * - * document_stream stream = parser.iterate_many(json,window); - * for(auto & doc : stream) { - * // do something with doc - * } - * size_t truncated = stream.truncated_bytes(); - * - */ - inline size_t truncated_bytes() const noexcept; - - class iterator { - public: - using value_type = simdjson_result; - using reference = value_type; - - using difference_type = std::ptrdiff_t; - - using iterator_category = std::input_iterator_tag; - - /** - * Default constructor. - */ - simdjson_really_inline iterator() noexcept; - /** - * Get the current document (or error). - */ - simdjson_really_inline simdjson_result operator*() noexcept; - /** - * Advance to the next document (prefix). - */ - inline iterator& operator++() noexcept; - /** - * Check if we're at the end yet. - * @param other the end iterator to compare to. - */ - simdjson_really_inline bool operator!=(const iterator &other) const noexcept; - /** - * @private - * - * Gives the current index in the input document in bytes. - * - * document_stream stream = parser.parse_many(json,window); - * for(auto i = stream.begin(); i != stream.end(); ++i) { - * auto doc = *i; - * size_t index = i.current_index(); - * } - * - * This function (current_index()) is experimental and the usage - * may change in future versions of simdjson: we find the API somewhat - * awkward and we would like to offer something friendlier. - */ - simdjson_really_inline size_t current_index() const noexcept; - - /** - * @private - * - * Gives a view of the current document at the current position. - * - * document_stream stream = parser.iterate_many(json,window); - * for(auto i = stream.begin(); i != stream.end(); ++i) { - * std::string_view v = i.source(); - * } - * - * The returned string_view instance is simply a map to the (unparsed) - * source string: it may thus include white-space characters and all manner - * of padding. - * - * This function (source()) is experimental and the usage - * may change in future versions of simdjson: we find the API somewhat - * awkward and we would like to offer something friendlier. - * - */ - simdjson_really_inline std::string_view source() const noexcept; - - /** - * Returns error of the stream (if any). - */ - inline error_code error() const noexcept; - - private: - simdjson_really_inline iterator(document_stream *s, bool finished) noexcept; - /** The document_stream we're iterating through. */ - document_stream* stream; - /** Whether we're finished or not. */ - bool finished; - - friend class document; - friend class document_stream; - friend class json_iterator; - }; - - /** - * Start iterating the documents in the stream. - */ - simdjson_really_inline iterator begin() noexcept; - /** - * The end of the stream, for iterator comparison purposes. - */ - simdjson_really_inline iterator end() noexcept; - -private: - - document_stream &operator=(const document_stream &) = delete; // Disallow copying - document_stream(const document_stream &other) = delete; // Disallow copying - - /** - * Construct a document_stream. Does not allocate or parse anything until the iterator is - * used. - * - * @param parser is a reference to the parser instance used to generate this document_stream - * @param buf is the raw byte buffer we need to process - * @param len is the length of the raw byte buffer in bytes - * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) - */ - simdjson_really_inline document_stream( - ondemand::parser &parser, - const uint8_t *buf, - size_t len, - size_t batch_size - ) noexcept; - - /** - * Parse the first document in the buffer. Used by begin(), to handle allocation and - * initialization. - */ - inline void start() noexcept; - - /** - * Parse the next document found in the buffer previously given to document_stream. - * - * The content should be a valid JSON document encoded as UTF-8. If there is a - * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are - * discouraged. - * - * You do NOT need to pre-allocate a parser. This function takes care of - * pre-allocating a capacity defined by the batch_size defined when creating the - * document_stream object. - * - * The function returns simdjson::EMPTY if there is no more data to be parsed. - * - * The function returns simdjson::SUCCESS (as integer = 0) in case of success - * and indicates that the buffer has successfully been parsed to the end. - * Every document it contained has been parsed without error. - * - * The function returns an error code from simdjson/simdjson.h in case of failure - * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; - * the simdjson::error_message function converts these error codes into a string). - * - * You can also check validity by calling parser.is_valid(). The same parser can - * and should be reused for the other documents in the buffer. - */ - inline void next() noexcept; - - /** Move the json_iterator of the document to the location of the next document in the stream. */ - inline void next_document() noexcept; - - /** Get the next document index. */ - inline size_t next_batch_start() const noexcept; - - /** Pass the next batch through stage 1 with the given parser. */ - inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; - - // Fields - ondemand::parser *parser; - const uint8_t *buf; - size_t len; - size_t batch_size; - /** - * We are going to use just one document instance. The document owns - * the json_iterator. It implies that we only ever pass a reference - * to the document to the users. - */ - document doc{}; - /** The error (or lack thereof) from the current document. */ - error_code error; - size_t batch_start{0}; - size_t doc_index{}; - - #ifdef SIMDJSON_THREADS_ENABLED - /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ - bool use_thread; - - inline void load_from_stage1_thread() noexcept; - - /** Start a thread to run stage 1 on the next batch. */ - inline void start_stage1_thread() noexcept; - - /** Wait for the stage 1 thread to finish and capture the results. */ - inline void finish_stage1_thread() noexcept; - - /** The error returned from the stage 1 thread. */ - error_code stage1_thread_error{UNINITIALIZED}; - /** The thread used to run stage 1 against the next batch in the background. */ - std::unique_ptr worker{new(std::nothrow) stage1_worker()}; - /** - * The parser used to run stage 1 in the background. Will be swapped - * with the regular parser when finished. - */ - ondemand::parser stage1_thread_parser{}; - - friend struct stage1_worker; - #endif // SIMDJSON_THREADS_ENABLED - - friend class parser; - friend class document; - friend class json_iterator; - friend struct simdjson_result; - friend struct internal::simdjson_result_base; -}; // document_stream - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { -template<> -struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { -public: - simdjson_really_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value) noexcept; ///< @private - simdjson_really_inline simdjson_result(error_code error) noexcept; ///< @private - simdjson_really_inline simdjson_result() noexcept = default; -}; - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/document_stream.h */ -/* begin file include/simdjson/generic/ondemand/serialization.h */ - -namespace simdjson { -/** - * Create a string-view instance out of a document instance. The string-view instance - * contains JSON text that is suitable to be parsed as JSON again. - */ -inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept; -/** - * Create a string-view instance out of a value instance. The string-view instance - * contains JSON text that is suitable to be parsed as JSON again. The value must - * not have been accessed previously. - */ -inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept; -/** - * Create a string-view instance out of an object instance. The string-view instance - * contains JSON text that is suitable to be parsed as JSON again. - */ -inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept; -/** - * Create a string-view instance out of an array instance. The string-view instance - * contains JSON text that is suitable to be parsed as JSON again. - */ -inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept; -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -inline simdjson_result to_json_string(simdjson_result x); -} // namespace simdjson - -/** - * We want to support argument-dependent lookup (ADL). - * Hence we should define operator<< in the namespace - * where the argument (here value, object, etc.) resides. - * Credit: @madhur4127 - * See https://github.com/simdjson/simdjson/issues/1768 - */ -namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - -/** - * Print JSON to an output stream. - * - * @param out The output stream. - * @param value The element. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x); -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); -#endif -/** - * Print JSON to an output stream. - * - * @param out The output stream. - * @param value The array. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value); -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); -#endif -/** - * Print JSON to an output stream. - * - * @param out The output stream. - * @param value The array. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value); -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); -#endif -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value); -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); -#endif -/** - * Print JSON to an output stream. - * - * @param out The output stream. - * @param value The object. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. - */ -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value); -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); -#endif -}}} // namespace simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand -/* end file include/simdjson/generic/ondemand/serialization.h */ -/* end file include/simdjson/generic/ondemand.h */ - -// Inline definitions -/* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { - -// -// internal::implementation_simdjson_result_base inline implementation -// - -template -simdjson_really_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { - error = this->second; - if (!error) { - value = std::forward>(*this).first; - } -} - -template -simdjson_warn_unused simdjson_really_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { - error_code error; - std::forward>(*this).tie(value, error); - return error; -} - -template -simdjson_really_inline error_code implementation_simdjson_result_base::error() const noexcept { - return this->second; -} - -#if SIMDJSON_EXCEPTIONS - -template -simdjson_really_inline T& implementation_simdjson_result_base::value() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return this->first; -} - -template -simdjson_really_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { - return std::forward>(*this).take_value(); -} - -template -simdjson_really_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return std::forward(this->first); -} - -template -simdjson_really_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { - return std::forward>(*this).take_value(); -} - -#endif // SIMDJSON_EXCEPTIONS - -template -simdjson_really_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { - return this->first; -} - -template -simdjson_really_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { - return this->first; -} - -template -simdjson_really_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { - return std::forward(this->first); -} - -template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept - : first{std::forward(value)}, second{error} {} -template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept - : implementation_simdjson_result_base(T{}, error) {} -template -simdjson_really_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept - : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} - -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson -/* end file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ -/* begin file include/simdjson/generic/ondemand-inl.h */ -/* begin file include/simdjson/generic/ondemand/json_type-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { - switch (type) { - case json_type::array: out << "array"; break; - case json_type::object: out << "object"; break; - case json_type::number: out << "number"; break; - case json_type::string: out << "string"; break; - case json_type::boolean: out << "boolean"; break; - case json_type::null: out << "null"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; -} - -inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { - switch (type) { - case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; - case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; - case number_type::floating_point_number: out << "floating-point number (binary64)"; break; - default: SIMDJSON_UNREACHABLE(); - } - return out; -} -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { - return out << type.value(); -} -#endif - - - -simdjson_really_inline number_type number::get_number_type() const noexcept { - return type; -} - -simdjson_really_inline bool number::is_uint64() const noexcept { - return get_number_type() == number_type::unsigned_integer; -} - -simdjson_really_inline uint64_t number::get_uint64() const noexcept { - return payload.unsigned_integer; -} - -simdjson_really_inline number::operator uint64_t() const noexcept { - return get_uint64(); -} - - -simdjson_really_inline bool number::is_int64() const noexcept { - return get_number_type() == number_type::signed_integer; -} - -simdjson_really_inline int64_t number::get_int64() const noexcept { - return payload.signed_integer; -} - -simdjson_really_inline number::operator int64_t() const noexcept { - return get_int64(); -} - -simdjson_really_inline bool number::is_double() const noexcept { - return get_number_type() == number_type::floating_point_number; -} - -simdjson_really_inline double number::get_double() const noexcept { - return payload.floating_point_number; -} - -simdjson_really_inline number::operator double() const noexcept { - return get_double(); -} - -simdjson_really_inline double number::as_double() const noexcept { - if(is_double()) { - return payload.floating_point_number; - } - if(is_int64()) { - return double(payload.signed_integer); - } - return double(payload.unsigned_integer); -} - -simdjson_really_inline void number::append_s64(int64_t value) noexcept { - payload.signed_integer = value; - type = number_type::signed_integer; -} - -simdjson_really_inline void number::append_u64(uint64_t value) noexcept { - payload.unsigned_integer = value; - type = number_type::unsigned_integer; -} - -simdjson_really_inline void number::append_double(double value) noexcept { - payload.floating_point_number = value; - type = number_type::floating_point_number; -} - -simdjson_really_inline void number::skip_double() noexcept { - type = number_type::floating_point_number; -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/json_type-inl.h */ -/* begin file include/simdjson/generic/ondemand/logger-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { -namespace logger { - -static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; -static constexpr const int LOG_EVENT_LEN = 20; -static constexpr const int LOG_BUFFER_LEN = 30; -static constexpr const int LOG_SMALL_BUFFER_LEN = 10; -static int log_depth = 0; // Not threadsafe. Log only. - -// Helper to turn unprintable or newline characters into spaces -static inline char printable_char(char c) { - if (c >= 0x20) { - return c; - } else { - return ' '; - } -} - -inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta); -} - -inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "", type, detail); -} -inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, "", type, detail, delta, depth_delta); -} - -inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { - log_line(iter, index, depth, "+", type, detail); - if (LOG_ENABLED) { log_depth++; } -} -inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_line(iter, "+", type, "", delta, depth_delta); - if (LOG_ENABLED) { log_depth++; } -} - -inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - if (LOG_ENABLED) { log_depth--; } - log_line(iter, "-", type, "", delta, depth_delta); -} - -inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_line(iter, "ERROR: ", error, detail, delta, depth_delta); -} -inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { - log_line(iter, index, depth, "ERROR: ", error, detail); -} - -inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_event(iter.json_iter(), type, detail, delta, depth_delta); -} - -inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { - log_value(iter.json_iter(), type, detail, delta, depth_delta); -} - -inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_start_value(iter.json_iter(), type, delta, depth_delta); -} - -inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { - log_end_value(iter.json_iter(), type, delta, depth_delta); -} - -inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { - log_error(iter.json_iter(), error, detail, delta, depth_delta); -} - -inline void log_headers() noexcept { - if (LOG_ENABLED) { - // Technically a static variable is not thread-safe, but if you are using threads - // and logging... well... - static bool displayed_hint{false}; - log_depth = 0; - printf("\n"); - if(!displayed_hint) { - // We only print this helpful header once. - printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); - printf("# +array says 'this is where we were when we discovered the start array'\n"); - printf("# -array says 'this is where we were when we ended the array'\n"); - printf("# skip says 'this is a structural or value I am skipping'\n"); - printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); - printf("#\n"); - printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); - printf("# in addition to the depth being displayed.\n"); - printf("#\n"); - printf("# Every token in the document has a single depth determined by the tokens before it,\n"); - printf("# and is not affected by what the token actually is.\n"); - printf("#\n"); - printf("# Not all structural elements are presented as tokens in the logs.\n"); - printf("#\n"); - printf("# We never give control to the user within an empty array or an empty object.\n"); - printf("#\n"); - printf("# Inside an array, having a depth greater than the array's depth means that\n"); - printf("# we are pointing inside a value.\n"); - printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); - printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); - displayed_hint = true; - } - printf("\n"); - printf("| %-*s ", LOG_EVENT_LEN, "Event"); - printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); - printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); - // printf("| %-*s ", 5, "Next#"); - printf("| %-*s ", 5, "Depth"); - printf("| Detail "); - printf("|\n"); - - printf("|%.*s", LOG_EVENT_LEN+2, DASHES); - printf("|%.*s", LOG_BUFFER_LEN+2, DASHES); - printf("|%.*s", LOG_SMALL_BUFFER_LEN+2, DASHES); - // printf("|%.*s", 5+2, DASHES); - printf("|%.*s", 5+2, DASHES); - printf("|--------"); - printf("|\n"); - fflush(stdout); - } -} - -inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept { - log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail); -} -inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept { - if (LOG_ENABLED) { - const int indent = depth*2; - const auto buf = iter.token.buf; - printf("| %*s%s%-*s ", - indent, "", - title_prefix, - LOG_EVENT_LEN - indent - int(strlen(title_prefix)), title - ); - { - // Print the current structural. - printf("| "); - auto current_structural = &buf[*index]; - for (int i=0;i(buf); } -simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(uint8_t *&dst) const noexcept { - uint8_t *end = stringparsing::parse_string(buf, dst); - if (!end) { return STRING_ERROR; } - std::string_view result(reinterpret_cast(dst), end-dst); - dst = end; - return result; -} - -simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { - size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;pos < target.size() && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;pos < target.size();pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - return true; -} - -simdjson_really_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { - size_t pos{0}; - // if the content has no escape character, just scan through it quickly! - for(;target[pos] && target[pos] != '\\';pos++) {} - // slow path may begin. - bool escaping{false}; - for(;target[pos];pos++) { - if((target[pos] == '"') && !escaping) { - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - return true; -} - - -simdjson_really_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { - // If we are going to call memcmp, then we must know something about the length of the raw_json_string. - return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); -} - -simdjson_really_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - if(target.size() <= SIMDJSON_PADDING) { - return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); - } - const char * r{raw()}; - size_t pos{0}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; -} - -simdjson_really_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;pos < target.size();pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - if(r[pos] != '"') { return false; } - return true; -} - - -simdjson_really_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { - // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - } - if(r[pos] != '"') { return false; } - return true; -} - -simdjson_really_inline bool raw_json_string::is_equal(const char* target) const noexcept { - // Assumptions: does not contain unescaped quote characters, and - // the raw content is quote terminated within a valid JSON string. - const char * r{raw()}; - size_t pos{0}; - bool escaping{false}; - for(;target[pos];pos++) { - if(r[pos] != target[pos]) { return false; } - // if target is a compile-time constant and it is free from - // quotes, then the next part could get optimized away through - // inlining. - if((target[pos] == '"') && !escaping) { - // We have reached the end of the raw_json_string but - // the target is not done. - return false; - } else if(target[pos] == '\\') { - escaping = !escaping; - } else { - escaping = false; - } - } - if(r[pos] != '"') { return false; } - return true; -} - -simdjson_unused simdjson_really_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { - return a.unsafe_is_equal(c); -} - -simdjson_unused simdjson_really_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { - return a == c; -} - -simdjson_unused simdjson_really_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { - return !(a == c); -} - -simdjson_unused simdjson_really_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { - return !(a == c); -} - - -simdjson_really_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter) const noexcept { - return unescape(iter.string_buf_loc()); -} - - -simdjson_unused simdjson_really_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { - bool in_escape = false; - const char *s = str.raw(); - while (true) { - switch (*s) { - case '\\': in_escape = !in_escape; break; - case '"': if (in_escape) { in_escape = false; } else { return out; } break; - default: if (in_escape) { in_escape = false; } - } - out << *s; - s++; - } -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -simdjson_really_inline simdjson_result simdjson_result::raw() const noexcept { - if (error()) { return error(); } - return first.raw(); -} -simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(uint8_t *&dst) const noexcept { - if (error()) { return error(); } - return first.unescape(dst); -} -simdjson_really_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept { - if (error()) { return error(); } - return first.unescape(iter); -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/raw_json_string-inl.h */ -/* begin file include/simdjson/generic/ondemand/token_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline token_iterator::token_iterator( - const uint8_t *_buf, - token_position position -) noexcept : buf{_buf}, _position{position} -{ -} - -simdjson_really_inline uint32_t token_iterator::current_offset() const noexcept { - return *(_position); -} - - -simdjson_really_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { - return &buf[*(_position++)]; -} - -simdjson_really_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { - return &buf[*position]; -} -simdjson_really_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { - return *position; -} -simdjson_really_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { - return *(position+1) - *position; -} - -simdjson_really_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { - return &buf[*(_position+delta)]; -} -simdjson_really_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { - return *(_position+delta); -} -simdjson_really_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { - return *(_position+delta+1) - *(_position+delta); -} - -simdjson_really_inline token_position token_iterator::position() const noexcept { - return _position; -} -simdjson_really_inline void token_iterator::set_position(token_position target_position) noexcept { - _position = target_position; -} - -simdjson_really_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { - return _position == other._position; -} -simdjson_really_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { - return _position != other._position; -} -simdjson_really_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { - return _position > other._position; -} -simdjson_really_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { - return _position >= other._position; -} -simdjson_really_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { - return _position < other._position; -} -simdjson_really_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { - return _position <= other._position; -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/token_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/json_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline json_iterator::json_iterator(json_iterator &&other) noexcept - : token(std::forward(other.token)), - parser{other.parser}, - _string_buf_loc{other._string_buf_loc}, - error{other.error}, - _depth{other._depth}, - _root{other._root}, - _streaming{other._streaming} -{ - other.parser = nullptr; -} -simdjson_really_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { - token = other.token; - parser = other.parser; - _string_buf_loc = other._string_buf_loc; - error = other.error; - _depth = other._depth; - _root = other._root; - _streaming = other._streaming; - other.parser = nullptr; - return *this; -} - -simdjson_really_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept - : token(buf, &_parser->implementation->structural_indexes[0]), - parser{_parser}, - _string_buf_loc{parser->string_buf.get()}, - _depth{1}, - _root{parser->implementation->structural_indexes.get()}, - _streaming{false} - -{ - logger::log_headers(); -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif -} - -inline void json_iterator::rewind() noexcept { - token.set_position( root_position() ); - logger::log_headers(); // We start again - _string_buf_loc = parser->string_buf.get(); - _depth = 1; -} - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and parent_depth, which is a desired effect. The warning does not show up if the -// skip_child() function is not marked inline). -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_warn_unused simdjson_really_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { - if (depth() <= parent_depth) { return SUCCESS; } - switch (*return_current_and_advance()) { - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - - // For the first open array/object in a value, we've already incremented depth, so keep it the same - // We never stop at colon, but if we did, it wouldn't affect depth - case '[': case '{': case ':': - logger::log_start_value(*this, "skip"); - break; - // If there is a comma, we have just finished a value in an array/object, and need to get back in - case ',': - logger::log_value(*this, "skip"); - break; - // ] or } means we just finished a value and need to jump out of the array/object - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } -#if SIMDJSON_CHECK_EOF - // If there are no more tokens, the parent is incomplete. - if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - break; - case '"': - if(*peek() == ':') { - // We are at a key!!! - // This might happen if you just started an object and you skip it immediately. - // Performance note: it would be nice to get rid of this check as it is somewhat - // expensive. - // https://github.com/simdjson/simdjson/issues/1742 - logger::log_value(*this, "key"); - return_current_and_advance(); // eat up the ':' - break; // important!!! - } - simdjson_fallthrough; - // Anything else must be a scalar value - default: - // For the first scalar, we will have incremented depth already, so we decrement it here. - logger::log_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - } - - // Now that we've considered the first value, we only increment/decrement for arrays/objects - while (position() < end_position()) { - switch (*return_current_and_advance()) { - case '[': case '{': - logger::log_start_value(*this, "skip"); - _depth++; - break; - // TODO consider whether matching braces is a requirement: if non-matching braces indicates - // *missing* braces, then future lookups are not in the object/arrays they think they are, - // violating the rule "validate enough structure that the user can be confident they are - // looking at the right values." - // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth - case ']': case '}': - logger::log_end_value(*this, "skip"); - _depth--; - if (depth() <= parent_depth) { return SUCCESS; } - break; - default: - logger::log_value(*this, "skip", ""); - break; - } - } - - return report_error(TAPE_ERROR, "not enough close braces"); -} - -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_really_inline bool json_iterator::at_root() const noexcept { - return position() == root_position(); -} - -simdjson_really_inline bool json_iterator::streaming() const noexcept { - return _streaming; -} - -simdjson_really_inline token_position json_iterator::root_position() const noexcept { - return _root; -} - -simdjson_really_inline void json_iterator::assert_at_document_depth() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); -} - -simdjson_really_inline void json_iterator::assert_at_root() const noexcept { - SIMDJSON_ASSUME( _depth == 1 ); -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument - // has side effects that will be discarded. - SIMDJSON_ASSUME( token.position() == _root ); -#endif -} - -simdjson_really_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { - assert_valid_position(token._position + required_tokens - 1); -} - -simdjson_really_inline void json_iterator::assert_valid_position(token_position position) const noexcept { -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); - SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); -#endif -} - -simdjson_really_inline bool json_iterator::at_end() const noexcept { - return position() == end_position(); -} -simdjson_really_inline token_position json_iterator::end_position() const noexcept { - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - return &parser->implementation->structural_indexes[n_structural_indexes]; -} - -inline std::string json_iterator::to_string() const noexcept { - if( !is_alive() ) { return "dead json_iterator instance"; } - const char * current_structural = reinterpret_cast(token.peek()); - return std::string("json_iterator [ depth : ") + std::to_string(_depth) - + std::string(", structural : '") + std::string(current_structural,1) - + std::string("', offset : ") + std::to_string(token.current_offset()) - + std::string("', error : ") + error_message(error) - + std::string(" ]"); -} - -inline simdjson_result json_iterator::current_location() noexcept { - if (!is_alive()) { // Unrecoverable error - if (!at_root()) { - return reinterpret_cast(token.peek(-1)); - } else { - return reinterpret_cast(token.peek()); - } - } - if (at_end()) { - return OUT_OF_BOUNDS; - } - return reinterpret_cast(token.peek()); -} - -simdjson_really_inline bool json_iterator::is_alive() const noexcept { - return parser; -} - -simdjson_really_inline void json_iterator::abandon() noexcept { - parser = nullptr; - _depth = 0; -} - -simdjson_really_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(); -#endif // SIMDJSON_CHECK_EOF - return token.return_current_and_advance(); -} - -simdjson_really_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { - // deliberately done without safety guard: - return token.peek(0); -} - -simdjson_really_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // SIMDJSON_CHECK_EOF - return token.peek(delta); -} - -simdjson_really_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_more_tokens(delta+1); -#endif // #if SIMDJSON_CHECK_EOF - return token.peek_length(delta); -} - -simdjson_really_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { - // todo: currently we require end-of-string buffering, but the following - // assert_valid_position should be turned on if/when we lift that condition. - // assert_valid_position(position); - // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF - // is ON by default, we have no choice but to disable it for real with a comment. - return token.peek(position); -} - -simdjson_really_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { -#if SIMDJSON_CHECK_EOF - assert_valid_position(position); -#endif // SIMDJSON_CHECK_EOF - return token.peek_length(position); -} - -simdjson_really_inline token_position json_iterator::last_position() const noexcept { - // The following line fails under some compilers... - // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); - // since it has side-effects. - uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; - SIMDJSON_ASSUME(n_structural_indexes > 0); - return &parser->implementation->structural_indexes[n_structural_indexes - 1]; -} -simdjson_really_inline const uint8_t *json_iterator::peek_last() const noexcept { - return token.peek(last_position()); -} - -simdjson_really_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { - SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); - SIMDJSON_ASSUME(_depth == parent_depth + 1); - _depth = parent_depth; -} - -simdjson_really_inline void json_iterator::descend_to(depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); - _depth = child_depth; -} - -simdjson_really_inline depth_t json_iterator::depth() const noexcept { - return _depth; -} - -simdjson_really_inline uint8_t *&json_iterator::string_buf_loc() noexcept { - return _string_buf_loc; -} - -simdjson_really_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); - logger::log_error(*this, message); - error = _error; - return error; -} - -simdjson_really_inline token_position json_iterator::position() const noexcept { - return token.position(); -} - -simdjson_really_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { - SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); - SIMDJSON_ASSUME(_depth == child_depth - 1); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS -#ifndef SIMDJSON_CLANG_VISUAL_STUDIO - SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); -#endif -#endif - token.set_position(position); - _depth = child_depth; -} - -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - -simdjson_really_inline token_position json_iterator::start_position(depth_t depth) const noexcept { - return parser->start_positions[depth]; -} - -simdjson_really_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { - parser->start_positions[depth] = position; -} - -#endif - - -simdjson_really_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { - SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); - logger::log_error(*this, message); - return _error; -} - -template -simdjson_warn_unused simdjson_really_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept { - // Let us guard against silly cases: - if((N < max_len) || (N == 0)) { return false; } - // Truncate whitespace to fit the buffer. - if (max_len > N-1) { - // if (jsoncharutils::is_not_structural_or_whitespace(json[N-1])) { return false; } - max_len = N-1; - } - - // Copy to the buffer. - std::memcpy(tmpbuf, json, max_len); - tmpbuf[max_len] = ' '; - return true; -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/json_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/value_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline value_iterator::value_iterator( - json_iterator *json_iter, - depth_t depth, - token_position start_position -) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} -{ -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_object(); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_object() noexcept { - SIMDJSON_TRY( start_container('{', "Not an object", "object") ); - return started_root_object(); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_object() noexcept { - assert_at_container_start(); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - if (*_json_iter->peek() == '}') { - logger::log_value(*_json_iter, "empty object"); - _json_iter->return_current_and_advance(); - end_container(); - return false; - } - return true; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_object() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if (! _json_iter->streaming() && (*_json_iter->peek_last() != '}')) { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); - } - return started_object(); -} - -simdjson_warn_unused simdjson_really_inline error_code value_iterator::end_container() noexcept { -#if SIMDJSON_CHECK_EOF - if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } - // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } -#endif // SIMDJSON_CHECK_EOF - _json_iter->ascend_to(depth()-1); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_field() noexcept { - assert_at_next(); - - // It's illegal to call this unless there are more tokens: anything that ends in } or ] is - // obligated to verify there are more tokens if they are not the top level. - switch (*_json_iter->return_current_and_advance()) { - case '}': - logger::log_end_value(*_json_iter, "object"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between object fields"); - } -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { - error_code error; - bool has_value; - // - // Initially, the object can be in one of a few different places: - // - // 1. The start of the object, at the first field: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - if (at_first_field()) { - has_value = true; - - // - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - return false; - - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // - } else { - if ((error = skip_child() )) { abandon(); return error; } - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif - } - while (has_value) { - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - //if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } - - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); // Skip the value entirely - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } - } - - // If the loop ended, we're out of fields to look at. - return false; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { - /** - * When find_field_unordered_raw is called, we can either be pointing at the - * first key, pointing outside (at the closing brace) or if a key was matched - * we can be either pointing right afterthe ':' right before the value (that we need skip), - * or we may have consumed the value and we might be at a comma or at the - * final brace (ready for a call to has_next_field()). - */ - error_code error; - bool has_value; - - // First, we scan from that point to the end. - // If we don't find a match, we may loop back around, and scan from the beginning to that point. - token_position search_start = _json_iter->position(); - - // We want to know whether we need to go back to the beginning. - bool at_first = at_first_field(); - /////////////// - // Initially, the object can be in one of a few different places: - // - // 1. At the first key: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2, index 1) - // ``` - // - if (at_first) { - has_value = true; - - // 2. When a previous search did not yield a value or the object is empty: - // - // ``` - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // { } - // ^ (depth 0, index 2) - // ``` - // - } else if (!is_open()) { - -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - // If we're past the end of the object, we're being iterated out of order. - // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, - // this object iterator will blithely scan that object for fields. - if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } -#endif - SIMDJSON_TRY(reset_object().get(has_value)); - at_first = true; - // 3. When a previous search found a field or an iterator yielded a value: - // - // ``` - // // When a field was not fully consumed (or not even touched at all) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 2) - // // When a field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // When the last field was fully consumed - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // ``` - // - } else { - // If someone queried a key but they not did access the value, then we are left pointing - // at the ':' and we need to move forward through the value... If the value was - // processed then skip_child() does not move the iterator (but may adjust the depth). - if ((error = skip_child() )) { abandon(); return error; } - search_start = _json_iter->position(); - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } -#endif - } - - // After initial processing, we will be in one of two states: - // - // ``` - // // At the beginning of a field - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 1) - // // At the end of the object - // { "a": [ 1, 2 ], "b": [ 3, 4 ] } - // ^ (depth 0) - // ``` - // - // Next, we find a match starting from the current position. - while (has_value) { - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field - - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - if ((error = field_key().get(actual_key) )) { abandon(); return error; }; - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - if ((error = field_value() )) { abandon(); return error; } - - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } - - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - if ((error = has_next_field().get(has_value) )) { abandon(); return error; } - } - // Performance note: it maybe wasteful to rewind to the beginning when there might be - // no other query following. Indeed, it would require reskipping the whole object. - // Instead, you can just stay where you are. If there is a new query, there is always time - // to rewind. - if(at_first) { return false; } - - // If we reach the end without finding a match, search the rest of the fields starting at the - // beginning of the object. - // (We have already run through the object before, so we've already validated its structure. We - // don't check errors in this bit.) - SIMDJSON_TRY(reset_object().get(has_value)); - while (true) { - SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field - - // Get the key and colon, stopping at the value. - raw_json_string actual_key; - // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes - // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. - // field_key() advances the pointer and checks that '"' is found (corresponding to a key). - // The depth is left unchanged by field_key(). - error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); - // field_value() will advance and check that we find a ':' separating the - // key and the value. It will also increment the depth by one. - error = field_value(); SIMDJSON_ASSUME(!error); - - // If it matches, stop and return - // We could do it this way if we wanted to allow arbitrary - // key content (including escaped quotes). - // if (actual_key.unsafe_is_equal(max_key_length, key)) { - // Instead we do the following which may trigger buffer overruns if the - // user provides an adversarial key (containing a well placed unescaped quote - // character and being longer than the number of bytes remaining in the JSON - // input). - if (actual_key.unsafe_is_equal(key)) { - logger::log_event(*this, "match", key, -2); - // If we return here, then we return while pointing at the ':' that we just checked. - return true; - } - - // No match: skip the value and see if , or } is next - logger::log_event(*this, "no match", key, -2); - // The call to skip_child is meant to skip over the value corresponding to the key. - // After skip_child(), we are right before the next comma (',') or the final brace ('}'). - SIMDJSON_TRY( skip_child() ); - // If we reached the end of the key-value pair we started from, then we know - // that the key is not there so we return false. We are either right before - // the next comma or the final brace. - if(_json_iter->position() == search_start) { return false; } - // The has_next_field() advances the pointer and check that either ',' or '}' is found. - // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, - // then we are in error and we abort. - error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); - // If we make the mistake of exiting here, then we could be left pointing at a key - // in the middle of an object. That's not an allowable state. - } - // If the loop ended, we're out of fields to look at. The program should - // never reach this point. - return false; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::field_key() noexcept { - assert_at_next(); - - const uint8_t *key = _json_iter->return_current_and_advance(); - if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } - return raw_json_string(key); -} - -simdjson_warn_unused simdjson_really_inline error_code value_iterator::field_value() noexcept { - assert_at_next(); - - if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } - _json_iter->descend_to(depth()+1); - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_array(); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::start_root_array() noexcept { - SIMDJSON_TRY( start_container('[', "Not an array", "array") ); - return started_root_array(); -} - -inline std::string value_iterator::to_string() const noexcept { - auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); - if(_json_iter != nullptr) { answer += _json_iter->to_string(); } - answer += std::string(" ]"); - return answer; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_array() noexcept { - assert_at_container_start(); - if (*_json_iter->peek() == ']') { - logger::log_value(*_json_iter, "empty array"); - _json_iter->return_current_and_advance(); - SIMDJSON_TRY( end_container() ); - return false; - } - _json_iter->descend_to(depth()+1); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - _json_iter->set_start_position(_depth, start_position()); -#endif - return true; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::started_root_array() noexcept { - // When in streaming mode, we cannot expect peek_last() to be the last structural element of the - // current document. It only works in the normal mode where we have indexed a single document. - // Note that adding a check for 'streaming' is not expensive since we only have at most - // one root element. - if ( ! _json_iter->streaming() && (*_json_iter->peek_last() != ']')) { - _json_iter->abandon(); - return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); - } - return started_array(); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::has_next_element() noexcept { - assert_at_next(); - - logger::log_event(*this, "has_next_element"); - switch (*_json_iter->return_current_and_advance()) { - case ']': - logger::log_end_value(*_json_iter, "array"); - SIMDJSON_TRY( end_container() ); - return false; - case ',': - _json_iter->descend_to(depth()+1); - return true; - default: - return report_error(TAPE_ERROR, "Missing comma between array elements"); - } -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { - auto not_true = atomparsing::str4ncmp(json, "true"); - auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); - bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); - if (error) { return incorrect_type_error("Not a boolean"); } - return simdjson_result(!not_true); -} -simdjson_really_inline bool value_iterator::parse_null(const uint8_t *json) const noexcept { - return !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_string() noexcept { - return get_raw_json_string().unescape(_json_iter->string_buf_loc()); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_raw_json_string() noexcept { - auto json = peek_scalar("string"); - if (*json != '"') { return incorrect_type_error("Not a string"); } - advance_scalar("string"); - return raw_json_string(json+1); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64() noexcept { - auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { - auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64() noexcept { - auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_int64_in_string() noexcept { - auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); - if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double() noexcept { - auto result = numberparsing::parse_double(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_double_in_string() noexcept { - auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); - if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_bool() noexcept { - auto result = parse_bool(peek_non_root_scalar("bool")); - if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } - return result; -} -simdjson_really_inline bool value_iterator::is_null() noexcept { - auto result = parse_null(peek_non_root_scalar("null")); - if(result) { advance_non_root_scalar("null"); } - return result; -} -simdjson_really_inline bool value_iterator::is_negative() noexcept { - return numberparsing::is_negative(peek_non_root_scalar("numbersign")); -} -simdjson_really_inline bool value_iterator::is_root_negative() noexcept { - return numberparsing::is_negative(peek_root_scalar("numbersign")); -} -simdjson_really_inline simdjson_result value_iterator::is_integer() noexcept { - return numberparsing::is_integer(peek_non_root_scalar("integer")); -} -simdjson_really_inline simdjson_result value_iterator::get_number_type() noexcept { - return numberparsing::get_number_type(peek_non_root_scalar("integer")); -} -simdjson_really_inline simdjson_result value_iterator::get_number() noexcept { - number num; - error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); - if(error) { return error; } - return num; -} - -simdjson_really_inline simdjson_result value_iterator::is_root_integer() noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("is_root_integer"); - uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { - return false; // if there are more than 20 characters, it cannot be represented as an integer. - } - return numberparsing::is_integer(tmpbuf); -} - -simdjson_really_inline simdjson_result value_iterator::get_root_number_type() noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1]; - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; - } - return numberparsing::get_number_type(tmpbuf); -} -simdjson_really_inline simdjson_result value_iterator::get_root_number() noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("number"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1]; - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; - } - number num; - error_code error = numberparsing::parse_number(tmpbuf, num); - if(error) { return error; } - advance_root_scalar("number"); - return num; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_string() noexcept { - return get_string(); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_raw_json_string() noexcept { - return get_raw_json_string(); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64() noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; - } - auto result = numberparsing::parse_unsigned(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("uint64"); } - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_uint64_in_string() noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("uint64"); - uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; - } - auto result = numberparsing::parse_unsigned_in_string(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("uint64"); } - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64() noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; - } - - auto result = numberparsing::parse_integer(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("int64"); } - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_int64_in_string() noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("int64"); - uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); - return NUMBER_ERROR; - } - - auto result = numberparsing::parse_integer_in_string(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("int64"); } - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double() noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1]; - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; - } - auto result = numberparsing::parse_double(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("double"); } - return result; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_double_in_string() noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("double"); - // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, - // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest - // number: -0.e-308. - uint8_t tmpbuf[1074+8+1]; - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { - logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); - return NUMBER_ERROR; - } - auto result = numberparsing::parse_double_in_string(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("double"); } - return result; -} -simdjson_warn_unused simdjson_really_inline simdjson_result value_iterator::get_root_bool() noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1]; - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { advance_root_scalar("bool"); } - return result; -} -simdjson_really_inline bool value_iterator::is_root_null() noexcept { - auto max_len = peek_start_length(); - auto json = peek_root_scalar("null"); - bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && - (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5]))); - if(result) { advance_root_scalar("null"); } - return result; -} - -simdjson_warn_unused simdjson_really_inline error_code value_iterator::skip_child() noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); - - return _json_iter->skip_child(depth()); -} - -simdjson_really_inline value_iterator value_iterator::child() const noexcept { - assert_at_child(); - return { _json_iter, depth()+1, _json_iter->token.position() }; -} - -// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller -// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is -// marked non-inline. -SIMDJSON_PUSH_DISABLE_WARNINGS -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_really_inline bool value_iterator::is_open() const noexcept { - return _json_iter->depth() >= depth(); -} -SIMDJSON_POP_DISABLE_WARNINGS - -simdjson_really_inline bool value_iterator::at_end() const noexcept { - return _json_iter->at_end(); -} - -simdjson_really_inline bool value_iterator::at_start() const noexcept { - return _json_iter->token.position() == start_position(); -} - -simdjson_really_inline bool value_iterator::at_first_field() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - return _json_iter->token.position() == start_position() + 1; -} - -simdjson_really_inline void value_iterator::abandon() noexcept { - _json_iter->abandon(); -} - -simdjson_warn_unused simdjson_really_inline depth_t value_iterator::depth() const noexcept { - return _depth; -} -simdjson_warn_unused simdjson_really_inline error_code value_iterator::error() const noexcept { - return _json_iter->error; -} -simdjson_warn_unused simdjson_really_inline uint8_t *&value_iterator::string_buf_loc() noexcept { - return _json_iter->string_buf_loc(); -} -simdjson_warn_unused simdjson_really_inline const json_iterator &value_iterator::json_iter() const noexcept { - return *_json_iter; -} -simdjson_warn_unused simdjson_really_inline json_iterator &value_iterator::json_iter() noexcept { - return *_json_iter; -} - -simdjson_really_inline const uint8_t *value_iterator::peek_start() const noexcept { - return _json_iter->peek(start_position()); -} -simdjson_really_inline uint32_t value_iterator::peek_start_length() const noexcept { - return _json_iter->peek_length(start_position()); -} - -simdjson_really_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return peek_start(); } - - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - return _json_iter->peek(); -} - -simdjson_really_inline void value_iterator::advance_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - if (!is_at_start()) { return; } - - // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. - assert_at_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} - -simdjson_really_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { - logger::log_start_value(*_json_iter, start_position(), depth(), type); - // If we're not at the position anymore, we don't want to advance the cursor. - const uint8_t *json; - if (!is_at_start()) { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - json = peek_start(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - } else { - assert_at_start(); - /** - * We should be prudent. Let us peek. If it is not the right type, we - * return an error. Only once we have determined that we have the right - * type are we allowed to advance! - */ - json = _json_iter->peek(); - if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } - _json_iter->return_current_and_advance(); - } - - - return SUCCESS; -} - - -simdjson_really_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } - - assert_at_root(); - return _json_iter->peek(); -} -simdjson_really_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return peek_start(); } - - assert_at_non_root_start(); - return _json_iter->peek(); -} - -simdjson_really_inline void value_iterator::advance_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } - - assert_at_root(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} -simdjson_really_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { - logger::log_value(*_json_iter, start_position(), depth(), type); - if (!is_at_start()) { return; } - - assert_at_non_root_start(); - _json_iter->return_current_and_advance(); - _json_iter->ascend_to(depth()-1); -} - -simdjson_really_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { - logger::log_error(*_json_iter, start_position(), depth(), message); - return INCORRECT_TYPE; -} - -simdjson_really_inline bool value_iterator::is_at_start() const noexcept { - return position() == start_position(); -} - -simdjson_really_inline bool value_iterator::is_at_key() const noexcept { - // Keys are at the same depth as the object. - // Note here that we could be safer and check that we are within an object, - // but we do not. - return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; -} - -simdjson_really_inline bool value_iterator::is_at_iterator_start() const noexcept { - // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). - auto delta = position() - start_position(); - return delta == 1 || delta == 2; -} - -inline void value_iterator::assert_at_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} - -inline void value_iterator::assert_at_container_start() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} - -inline void value_iterator::assert_at_next() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth ); - SIMDJSON_ASSUME( _depth > 0 ); -} - -simdjson_really_inline void value_iterator::move_at_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position); -} - -simdjson_really_inline void value_iterator::move_at_container_start() noexcept { - _json_iter->_depth = _depth; - _json_iter->token.set_position(_start_position + 1); -} - -simdjson_really_inline simdjson_result value_iterator::reset_array() noexcept { - move_at_container_start(); - return started_array(); -} - -simdjson_really_inline simdjson_result value_iterator::reset_object() noexcept { - move_at_container_start(); - return started_object(); -} - -inline void value_iterator::assert_at_child() const noexcept { - SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); - SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); - SIMDJSON_ASSUME( _depth > 0 ); -} - -inline void value_iterator::assert_at_root() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth == 1 ); -} - -inline void value_iterator::assert_at_non_root_start() const noexcept { - assert_at_start(); - SIMDJSON_ASSUME( _depth > 1 ); -} - -inline void value_iterator::assert_is_valid() const noexcept { - SIMDJSON_ASSUME( _json_iter != nullptr ); -} - -simdjson_really_inline bool value_iterator::is_valid() const noexcept { - return _json_iter != nullptr; -} - -simdjson_really_inline simdjson_result value_iterator::type() const noexcept { - switch (*peek_start()) { - case '{': - return json_type::object; - case '[': - return json_type::array; - case '"': - return json_type::string; - case 'n': - return json_type::null; - case 't': case 'f': - return json_type::boolean; - case '-': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - return json_type::number; - default: - return TAPE_ERROR; - } -} - -simdjson_really_inline token_position value_iterator::start_position() const noexcept { - return _start_position; -} - -simdjson_really_inline token_position value_iterator::position() const noexcept { - return _json_iter->position(); -} - -simdjson_really_inline token_position value_iterator::end_position() const noexcept { - return _json_iter->end_position(); -} - -simdjson_really_inline token_position value_iterator::last_position() const noexcept { - return _json_iter->last_position(); -} - -simdjson_really_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { - return _json_iter->report_error(error, message); -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/value_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/array_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} - -simdjson_really_inline simdjson_result array_iterator::operator*() noexcept { - if (iter.error()) { iter.abandon(); return iter.error(); } - return value(iter.child()); -} -simdjson_really_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { - return !(*this != other); -} -simdjson_really_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { - return iter.is_open(); -} -simdjson_really_inline array_iterator &array_iterator::operator++() noexcept { - error_code error; - // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. - // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. - if (( error = iter.error() )) { return *this; } - if (( error = iter.skip_child() )) { return *this; } - if (( error = iter.has_next_element().error() )) { return *this; } - return *this; -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value -) noexcept - : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); -} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base({}, error) -{ -} - -simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; -} -simdjson_really_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; -} -simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; -} -simdjson_really_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++(first); - return *this; -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/array_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/object_iterator-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -// -// object_iterator -// - -simdjson_really_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept - : iter{_iter} -{} - -simdjson_really_inline simdjson_result object_iterator::operator*() noexcept { - error_code error = iter.error(); - if (error) { iter.abandon(); return error; } - auto result = field::start(iter); - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (result.error()) { iter.abandon(); } - return result; -} -simdjson_really_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { - return !(*this != other); -} -simdjson_really_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { - return iter.is_open(); -} - -simdjson_really_inline object_iterator &object_iterator::operator++() noexcept { - // TODO this is a safety rail ... users should exit loops as soon as they receive an error. - // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. - if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error - - simdjson_unused error_code error; - if ((error = iter.skip_child() )) { return *this; } - - simdjson_unused bool has_value; - if ((error = iter.has_next_field().get(has_value) )) { return *this; }; - return *this; -} - -// -// ### Live States -// -// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the object is first found and the iterator is just past the {. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the , or } before the next value. In this state, -// depth == iter.depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter.depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the object iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an -// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter.depth == depth, and at_start == false. -// -// Errors that occur while reading a field to give to the user (such as when the key is not a -// string or the field is missing a colon) are yielded immediately. Depth is then decremented, -// moving to the Finished state without transitioning through an Error state at all. -// -// ## Terminal State -// -// The terminal state has iter.depth < depth. at_start is always false. -// -// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. -// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. -// - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value -) noexcept - : implementation_simdjson_result_base(std::forward(value)) -{ - first.iter.assert_is_valid(); -} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base({}, error) -{ -} - -simdjson_really_inline simdjson_result simdjson_result::operator*() noexcept { - if (error()) { return error(); } - return *first; -} -// If we're iterating and there is an error, return the error once. -simdjson_really_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return !error(); } - return first == other.first; -} -// If we're iterating and there is an error, return the error once. -simdjson_really_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { - if (!first.iter.is_valid()) { return error(); } - return first != other.first; -} -// Checks for ']' and ',' -simdjson_really_inline simdjson_result &simdjson_result::operator++() noexcept { - // Clear the error if there is one, so we don't yield it twice - if (error()) { second = SUCCESS; return *this; } - ++first; - return *this; -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/object_iterator-inl.h */ -/* begin file include/simdjson/generic/ondemand/array-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -// -// ### Live States -// -// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is -// always SUCCESS: -// -// - Start: This is the state when the array is first found and the iterator is just past the `{`. -// In this state, at_start == true. -// - Next: After we hand a scalar value to the user, or an array/object which they then fully -// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, -// depth == iter->depth, at_start == false, and error == SUCCESS. -// - Unfinished Business: When we hand an array/object to the user which they do not fully -// iterate over, we need to finish that iteration by skipping child values until we reach the -// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. -// -// ## Error States -// -// In error states, we will yield exactly one more value before stopping. iter->depth == depth -// and at_start is always false. We decrement after yielding the error, moving to the Finished -// state. -// -// - Chained Error: When the array iterator is part of an error chain--for example, in -// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an -// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and -// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. -// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, -// we flag that as an error and treat it exactly the same as a Chained Error. In this state, -// error == TAPE_ERROR, iter->depth == depth, and at_start == false. -// -// ## Terminal State -// -// The terminal state has iter->depth < depth. at_start is always false. -// -// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this -// by decrementing depth. In this state, iter->depth < depth, at_start == false, and -// error == SUCCESS. -// - -simdjson_really_inline array::array(const value_iterator &_iter) noexcept - : iter{_iter} -{ -} - -simdjson_really_inline simdjson_result array::start(value_iterator &iter) noexcept { - // We don't need to know if the array is empty to start iteration, but we do want to know if there - // is an error--thus `simdjson_unused`. - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_array().get(has_value) ); - return array(iter); -} -simdjson_really_inline simdjson_result array::start_root(value_iterator &iter) noexcept { - simdjson_unused bool has_value; - SIMDJSON_TRY( iter.start_root_array().get(has_value) ); - return array(iter); -} -simdjson_really_inline simdjson_result array::started(value_iterator &iter) noexcept { - bool has_value; - SIMDJSON_TRY(iter.started_array().get(has_value)); - return array(iter); -} - -simdjson_really_inline simdjson_result array::begin() noexcept { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return array_iterator(iter); -} -simdjson_really_inline simdjson_result array::end() noexcept { - return array_iterator(iter); -} -simdjson_really_inline error_code array::consume() noexcept { - auto error = iter.json_iter().skip_child(iter.depth()-1); - if(error) { iter.abandon(); } - return error; -} - -simdjson_really_inline simdjson_result array::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); -} - -SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING -simdjson_really_inline simdjson_result array::count_elements() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the array after counting the number of elements. - iter.reset_array(); - return count; -} - -simdjson_really_inline simdjson_result array::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_array().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; -} - -inline simdjson_result array::reset() & noexcept { - return iter.reset_array(); -} - -inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - // - means "the append position" or "the element after the end of the array" - // We don't support this, because we're returning a real element, not a position. - if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } - - // Read the array index - size_t array_index = 0; - size_t i; - for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { - uint8_t digit = uint8_t(json_pointer[i] - '0'); - // Check for non-digit in array index. If it's there, we're trying to get a field in an object - if (digit > 9) { return INCORRECT_TYPE; } - array_index = array_index*10 + digit; - } - - // 0 followed by other digits is invalid - if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" - - // Empty string is invalid; so is a "/" with no digits before it - if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" - // Get the child - auto child = at(array_index); - // If there is an error, it ends here - if(child.error()) { - return child; - } - - // If there is a /, we're not done yet, call recursively. - if (i < json_pointer.length()) { - child = child.at_pointer(json_pointer.substr(i)); - } - return child; -} - -simdjson_really_inline simdjson_result array::at(size_t index) noexcept { - size_t i = 0; - for (auto value : *this) { - if (i == index) { return value; } - i++; - } - return INDEX_OUT_OF_BOUNDS; -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value -) noexcept - : implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_really_inline simdjson_result::simdjson_result( - error_code error -) noexcept - : implementation_simdjson_result_base(error) -{ -} - -simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_really_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); -} -simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_really_inline simdjson_result simdjson_result::is_empty() & noexcept { - if (error()) { return error(); } - return first.is_empty(); -} -simdjson_really_inline simdjson_result simdjson_result::at(size_t index) noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/array-inl.h */ -/* begin file include/simdjson/generic/ondemand/document-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline document::document(ondemand::json_iterator &&_iter) noexcept - : iter{std::forward(_iter)} -{ - logger::log_start_value(iter, "document"); -} - -simdjson_really_inline document document::start(json_iterator &&iter) noexcept { - return document(std::forward(iter)); -} - -inline void document::rewind() noexcept { - iter.rewind(); -} - -inline std::string document::to_debug_string() noexcept { - return iter.to_string(); -} - -inline simdjson_result document::current_location() noexcept { - return iter.current_location(); -} - -inline int32_t document::current_depth() const noexcept { - return iter.depth(); -} - -inline bool document::is_alive() noexcept { - return iter.is_alive(); -} -simdjson_really_inline value_iterator document::resume_value_iterator() noexcept { - return value_iterator(&iter, 1, iter.root_position()); -} -simdjson_really_inline value_iterator document::get_root_value_iterator() noexcept { - return resume_value_iterator(); -} -simdjson_really_inline simdjson_result document::start_or_resume_object() noexcept { - if (iter.at_root()) { - return get_object(); - } else { - return object::resume(resume_value_iterator()); - } -} -simdjson_really_inline simdjson_result document::get_value() noexcept { - // Make sure we start any arrays or objects before returning, so that start_root_() - // gets called. - iter.assert_at_document_depth(); - switch (*iter.peek()) { - case '[': - case '{': - return value(get_root_value_iterator()); - default: - // Unfortunately, scalar documents are a special case in simdjson and they cannot - // be safely converted to value instances. - return SCALAR_DOCUMENT_AS_VALUE; - // return value(get_root_value_iterator()); - } -} -simdjson_really_inline simdjson_result document::get_array() & noexcept { - auto value = get_root_value_iterator(); - return array::start_root(value); -} -simdjson_really_inline simdjson_result document::get_object() & noexcept { - auto value = get_root_value_iterator(); - return object::start_root(value); -} -simdjson_really_inline simdjson_result document::get_uint64() noexcept { - return get_root_value_iterator().get_root_uint64(); -} -simdjson_really_inline simdjson_result document::get_uint64_in_string() noexcept { - return get_root_value_iterator().get_root_uint64_in_string(); -} -simdjson_really_inline simdjson_result document::get_int64() noexcept { - return get_root_value_iterator().get_root_int64(); -} -simdjson_really_inline simdjson_result document::get_int64_in_string() noexcept { - return get_root_value_iterator().get_root_int64_in_string(); -} -simdjson_really_inline simdjson_result document::get_double() noexcept { - return get_root_value_iterator().get_root_double(); -} -simdjson_really_inline simdjson_result document::get_double_in_string() noexcept { - return get_root_value_iterator().get_root_double_in_string(); -} -simdjson_really_inline simdjson_result document::get_string() noexcept { - return get_root_value_iterator().get_root_string(); -} -simdjson_really_inline simdjson_result document::get_raw_json_string() noexcept { - return get_root_value_iterator().get_root_raw_json_string(); -} -simdjson_really_inline simdjson_result document::get_bool() noexcept { - return get_root_value_iterator().get_root_bool(); -} -simdjson_really_inline bool document::is_null() noexcept { - return get_root_value_iterator().is_root_null(); -} - -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_array(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_object(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_string(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_double(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_uint64(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_int64(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_bool(); } -template<> simdjson_really_inline simdjson_result document::get() & noexcept { return get_value(); } - -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_string(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } -template<> simdjson_really_inline simdjson_result document::get() && noexcept { return get_value(); } - -template simdjson_really_inline error_code document::get(T &out) & noexcept { - return get().get(out); -} -template simdjson_really_inline error_code document::get(T &out) && noexcept { - return std::forward(*this).get().get(out); -} - -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline document::operator array() & noexcept(false) { return get_array(); } -simdjson_really_inline document::operator object() & noexcept(false) { return get_object(); } -simdjson_really_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } -simdjson_really_inline document::operator int64_t() noexcept(false) { return get_int64(); } -simdjson_really_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_really_inline document::operator std::string_view() noexcept(false) { return get_string(); } -simdjson_really_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } -simdjson_really_inline document::operator bool() noexcept(false) { return get_bool(); } -simdjson_really_inline document::operator value() noexcept(false) { return get_value(); } - -#endif -simdjson_really_inline simdjson_result document::count_elements() & noexcept { - auto a = get_array(); - simdjson_result answer = a.count_elements(); - /* If there was an array, we are now left pointing at its first element. */ - if(answer.error() == SUCCESS) { - iter._depth = 1 ; /* undoing the increment so we go back at the doc depth.*/ - iter.assert_at_document_depth(); - } - return answer; -} -simdjson_really_inline simdjson_result document::count_fields() & noexcept { - auto a = get_object(); - simdjson_result answer = a.count_fields(); - /* If there was an array, we are now left pointing at its first element. */ - if(answer.error() == SUCCESS) { - iter._depth = 1 ; /* undoing the increment so we go back at the doc depth.*/ - iter.assert_at_document_depth(); - } - return answer; -} -simdjson_really_inline simdjson_result document::at(size_t index) & noexcept { - auto a = get_array(); - return a.at(index); -} -simdjson_really_inline simdjson_result document::begin() & noexcept { - return get_array().begin(); -} -simdjson_really_inline simdjson_result document::end() & noexcept { - return {}; -} - -simdjson_really_inline simdjson_result document::find_field(std::string_view key) & noexcept { - return start_or_resume_object().find_field(key); -} -simdjson_really_inline simdjson_result document::find_field(const char *key) & noexcept { - return start_or_resume_object().find_field(key); -} -simdjson_really_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_really_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_really_inline simdjson_result document::operator[](std::string_view key) & noexcept { - return start_or_resume_object()[key]; -} -simdjson_really_inline simdjson_result document::operator[](const char *key) & noexcept { - return start_or_resume_object()[key]; -} - -simdjson_really_inline error_code document::consume() noexcept { - auto error = iter.skip_child(0); - if(error) { iter.abandon(); } - return error; -} - -simdjson_really_inline simdjson_result document::raw_json() noexcept { - auto _iter = get_root_value_iterator(); - const uint8_t * starting_point{_iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - // After 'consume()', we could be left pointing just beyond the document, but that - // is ok because we are not going to dereference the final pointer position, we just - // use it to compute the length in bytes. - const uint8_t * final_point{iter.unsafe_pointer()}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); -} - -simdjson_really_inline simdjson_result document::type() noexcept { - return get_root_value_iterator().type(); -} - -simdjson_really_inline simdjson_result document::is_scalar() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); -} - -simdjson_really_inline bool document::is_negative() noexcept { - return get_root_value_iterator().is_root_negative(); -} - -simdjson_really_inline simdjson_result document::is_integer() noexcept { - return get_root_value_iterator().is_root_integer(); -} - -simdjson_really_inline simdjson_result document::get_number_type() noexcept { - return get_root_value_iterator().get_root_number_type(); -} - -simdjson_really_inline simdjson_result document::get_number() noexcept { - return get_root_value_iterator().get_root_number(); -} - - -simdjson_really_inline simdjson_result document::raw_json_token() noexcept { - auto _iter = get_root_value_iterator(); - return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); -} - -simdjson_really_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { - rewind(); // Rewind the document each time at_pointer is called - if (json_pointer.empty()) { - return this->get_value(); - } - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_really_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base( - error - ) -{ -} -simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_really_inline simdjson_result simdjson_result::at(size_t index) & noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_really_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; -} -simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { - return {}; -} -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { - if (error()) { return error(); } - return first.get_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); -} -simdjson_really_inline simdjson_result simdjson_result::get_value() noexcept { - if (error()) { return error(); } - return first.get_value(); -} -simdjson_really_inline bool simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); -} - -template -simdjson_really_inline simdjson_result simdjson_result::get() & noexcept { - if (error()) { return error(); } - return first.get(); -} -template -simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first).get(); -} -template -simdjson_really_inline error_code simdjson_result::get(T &out) & noexcept { - if (error()) { return error(); } - return first.get(out); -} -template -simdjson_really_inline error_code simdjson_result::get(T &out) && noexcept { - if (error()) { return error(); } - return std::forward(first).get(out); -} - -template<> simdjson_really_inline simdjson_result simdjson_result::get() & noexcept = delete; -template<> simdjson_really_inline simdjson_result simdjson_result::get() && noexcept { - if (error()) { return error(); } - return std::forward(first); -} -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) & noexcept = delete; -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) && noexcept { - if (error()) { return error(); } - out = std::forward(first); - return SUCCESS; -} - -simdjson_really_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); -} - -simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); -} - - -simdjson_really_inline bool simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); -} - -simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); -} - -simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); -} - -simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); -} - - -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -#endif - - -simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); -} - -simdjson_really_inline int32_t simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); -} - -simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); -} - -simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} - - -} // namespace simdjson - - -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline document_reference::document_reference() noexcept : doc{nullptr} {} -simdjson_really_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} -simdjson_really_inline void document_reference::rewind() noexcept { doc->rewind(); } -simdjson_really_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } -simdjson_really_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } -simdjson_really_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_uint64(); } -simdjson_really_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_int64(); } -simdjson_really_inline simdjson_result document_reference::get_double() noexcept { return doc->get_double(); } -simdjson_really_inline simdjson_result document_reference::get_string() noexcept { return doc->get_string(); } -simdjson_really_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_raw_json_string(); } -simdjson_really_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_bool(); } -simdjson_really_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } -simdjson_really_inline bool document_reference::is_null() noexcept { return doc->is_null(); } - -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline document_reference::operator array() & noexcept(false) { return array(*doc); } -simdjson_really_inline document_reference::operator object() & noexcept(false) { return object(*doc); } -simdjson_really_inline document_reference::operator uint64_t() noexcept(false) { return uint64_t(*doc); } -simdjson_really_inline document_reference::operator int64_t() noexcept(false) { return int64_t(*doc); } -simdjson_really_inline document_reference::operator double() noexcept(false) { return double(*doc); } -simdjson_really_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } -simdjson_really_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } -simdjson_really_inline document_reference::operator bool() noexcept(false) { return bool(*doc); } -simdjson_really_inline document_reference::operator value() noexcept(false) { return value(*doc); } -#endif -simdjson_really_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } -simdjson_really_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } -simdjson_really_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } -simdjson_really_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } -simdjson_really_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } -simdjson_really_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } -simdjson_really_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } -simdjson_really_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } -simdjson_really_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } -simdjson_really_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } -simdjson_really_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } -simdjson_really_inline simdjson_result document_reference::type() noexcept { return doc->type(); } -simdjson_really_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } -simdjson_really_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } -simdjson_really_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } -simdjson_really_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } -simdjson_really_inline simdjson_result document_reference::is_integer() noexcept { return doc->is_integer(); } -simdjson_really_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_number_type(); } -simdjson_really_inline simdjson_result document_reference::get_number() noexcept { return doc->get_number(); } -simdjson_really_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } -simdjson_really_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } -simdjson_really_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} -simdjson_really_inline document_reference::operator document&() const noexcept { return *doc; } - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - - - -namespace simdjson { -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) - noexcept : implementation_simdjson_result_base(std::forward(value), error) {} - - -simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_really_inline simdjson_result simdjson_result::at(size_t index) & noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_really_inline error_code simdjson_result::rewind() noexcept { - if (error()) { return error(); } - first.rewind(); - return SUCCESS; -} -simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { - return {}; -} -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_really_inline simdjson_result simdjson_result::get_array() & noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_really_inline simdjson_result simdjson_result::get_object() & noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { - if (error()) { return error(); } - return first.get_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); -} -simdjson_really_inline simdjson_result simdjson_result::get_value() noexcept { - if (error()) { return error(); } - return first.get_value(); -} -simdjson_really_inline bool simdjson_result::is_null() noexcept { - if (error()) { return error(); } - return first.is_null(); -} -simdjson_really_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); -} -simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); -} -simdjson_really_inline bool simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); -} -simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); -} -simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); -} -simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); -} -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -#endif - -simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); -} - -simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); -} - -simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} - - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/document-inl.h */ -/* begin file include/simdjson/generic/ondemand/value-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline value::value(const value_iterator &_iter) noexcept - : iter{_iter} -{ -} -simdjson_really_inline value value::start(const value_iterator &iter) noexcept { - return iter; -} -simdjson_really_inline value value::resume(const value_iterator &iter) noexcept { - return iter; -} - -simdjson_really_inline simdjson_result value::get_array() noexcept { - return array::start(iter); -} -simdjson_really_inline simdjson_result value::get_object() noexcept { - return object::start(iter); -} -simdjson_really_inline simdjson_result value::start_or_resume_object() noexcept { - if (iter.at_start()) { - return get_object(); - } else { - return object::resume(iter); - } -} - -simdjson_really_inline simdjson_result value::get_raw_json_string() noexcept { - return iter.get_raw_json_string(); -} -simdjson_really_inline simdjson_result value::get_string() noexcept { - return iter.get_string(); -} -simdjson_really_inline simdjson_result value::get_double() noexcept { - return iter.get_double(); -} -simdjson_really_inline simdjson_result value::get_double_in_string() noexcept { - return iter.get_double_in_string(); -} -simdjson_really_inline simdjson_result value::get_uint64() noexcept { - return iter.get_uint64(); -} -simdjson_really_inline simdjson_result value::get_uint64_in_string() noexcept { - return iter.get_uint64_in_string(); -} -simdjson_really_inline simdjson_result value::get_int64() noexcept { - return iter.get_int64(); -} -simdjson_really_inline simdjson_result value::get_int64_in_string() noexcept { - return iter.get_int64_in_string(); -} -simdjson_really_inline simdjson_result value::get_bool() noexcept { - return iter.get_bool(); -} -simdjson_really_inline bool value::is_null() noexcept { - return iter.is_null(); -} - -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_array(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_object(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_string(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_number(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_double(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_uint64(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_int64(); } -template<> simdjson_really_inline simdjson_result value::get() noexcept { return get_bool(); } - -template simdjson_really_inline error_code value::get(T &out) noexcept { - return get().get(out); -} - -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline value::operator array() noexcept(false) { - return get_array(); -} -simdjson_really_inline value::operator object() noexcept(false) { - return get_object(); -} -simdjson_really_inline value::operator uint64_t() noexcept(false) { - return get_uint64(); -} -simdjson_really_inline value::operator int64_t() noexcept(false) { - return get_int64(); -} -simdjson_really_inline value::operator double() noexcept(false) { - return get_double(); -} -simdjson_really_inline value::operator std::string_view() noexcept(false) { - return get_string(); -} -simdjson_really_inline value::operator raw_json_string() noexcept(false) { - return get_raw_json_string(); -} -simdjson_really_inline value::operator bool() noexcept(false) { - return get_bool(); -} -#endif - -simdjson_really_inline simdjson_result value::begin() & noexcept { - return get_array().begin(); -} -simdjson_really_inline simdjson_result value::end() & noexcept { - return {}; -} -simdjson_really_inline simdjson_result value::count_elements() & noexcept { - simdjson_result answer; - auto a = get_array(); - answer = a.count_elements(); - // count_elements leaves you pointing inside the array, at the first element. - // We need to move back so that the user can create a new array (which requires that - // we point at '['). - iter.move_at_start(); - return answer; -} -simdjson_really_inline simdjson_result value::count_fields() & noexcept { - simdjson_result answer; - auto a = get_object(); - answer = a.count_fields(); - iter.move_at_start(); - return answer; -} -simdjson_really_inline simdjson_result value::at(size_t index) noexcept { - auto a = get_array(); - return a.at(index); -} - -simdjson_really_inline simdjson_result value::find_field(std::string_view key) noexcept { - return start_or_resume_object().find_field(key); -} -simdjson_really_inline simdjson_result value::find_field(const char *key) noexcept { - return start_or_resume_object().find_field(key); -} - -simdjson_really_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} -simdjson_really_inline simdjson_result value::find_field_unordered(const char *key) noexcept { - return start_or_resume_object().find_field_unordered(key); -} - -simdjson_really_inline simdjson_result value::operator[](std::string_view key) noexcept { - return start_or_resume_object()[key]; -} -simdjson_really_inline simdjson_result value::operator[](const char *key) noexcept { - return start_or_resume_object()[key]; -} - -simdjson_really_inline simdjson_result value::type() noexcept { - return iter.type(); -} - -simdjson_really_inline simdjson_result value::is_scalar() noexcept { - json_type this_type; - auto error = type().get(this_type); - if(error) { return error; } - return ! ((this_type == json_type::array) || (this_type == json_type::object)); -} - -simdjson_really_inline bool value::is_negative() noexcept { - return iter.is_negative(); -} - -simdjson_really_inline simdjson_result value::is_integer() noexcept { - return iter.is_integer(); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value::get_number_type() noexcept { - return iter.get_number_type(); -} -simdjson_warn_unused simdjson_really_inline simdjson_result value::get_number() noexcept { - return iter.get_number(); -} - -simdjson_really_inline std::string_view value::raw_json_token() noexcept { - return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); -} - -simdjson_really_inline simdjson_result value::current_location() noexcept { - return iter.json_iter().current_location(); -} - -simdjson_really_inline int32_t value::current_depth() const noexcept{ - return iter.json_iter().depth(); -} - -simdjson_really_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { - json_type t; - SIMDJSON_TRY(type().get(t)); - switch (t) - { - case json_type::array: - return (*this).get_array().at_pointer(json_pointer); - case json_type::object: - return (*this).get_object().at_pointer(json_pointer); - default: - return INVALID_JSON_POINTER; - } -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_really_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_really_inline simdjson_result simdjson_result::count_elements() & noexcept { - if (error()) { return error(); } - return first.count_elements(); -} -simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} -simdjson_really_inline simdjson_result simdjson_result::at(size_t index) noexcept { - if (error()) { return error(); } - return first.at(index); -} -simdjson_really_inline simdjson_result simdjson_result::begin() & noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_really_inline simdjson_result simdjson_result::end() & noexcept { - if (error()) { return error(); } - return {}; -} - -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_really_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field(key); -} - -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} - -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { - if (error()) { return error(); } - return first[key]; -} - -simdjson_really_inline simdjson_result simdjson_result::get_array() noexcept { - if (error()) { return error(); } - return first.get_array(); -} -simdjson_really_inline simdjson_result simdjson_result::get_object() noexcept { - if (error()) { return error(); } - return first.get_object(); -} -simdjson_really_inline simdjson_result simdjson_result::get_uint64() noexcept { - if (error()) { return error(); } - return first.get_uint64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { - if (error()) { return error(); } - return first.get_uint64_in_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_int64() noexcept { - if (error()) { return error(); } - return first.get_int64(); -} -simdjson_really_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { - if (error()) { return error(); } - return first.get_int64_in_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_double() noexcept { - if (error()) { return error(); } - return first.get_double(); -} -simdjson_really_inline simdjson_result simdjson_result::get_double_in_string() noexcept { - if (error()) { return error(); } - return first.get_double_in_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_string() noexcept { - if (error()) { return error(); } - return first.get_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { - if (error()) { return error(); } - return first.get_raw_json_string(); -} -simdjson_really_inline simdjson_result simdjson_result::get_bool() noexcept { - if (error()) { return error(); } - return first.get_bool(); -} -simdjson_really_inline bool simdjson_result::is_null() noexcept { - if (error()) { return false; } - return first.is_null(); -} - -template simdjson_really_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return first.get(); -} -template simdjson_really_inline error_code simdjson_result::get(T &out) noexcept { - if (error()) { return error(); } - return first.get(out); -} - -template<> simdjson_really_inline simdjson_result simdjson_result::get() noexcept { - if (error()) { return error(); } - return std::move(first); -} -template<> simdjson_really_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &out) noexcept { - if (error()) { return error(); } - out = first; - return SUCCESS; -} - -simdjson_really_inline simdjson_result simdjson_result::type() noexcept { - if (error()) { return error(); } - return first.type(); -} -simdjson_really_inline simdjson_result simdjson_result::is_scalar() noexcept { - if (error()) { return error(); } - return first.is_scalar(); -} -simdjson_really_inline simdjson_result simdjson_result::is_negative() noexcept { - if (error()) { return error(); } - return first.is_negative(); -} -simdjson_really_inline simdjson_result simdjson_result::is_integer() noexcept { - if (error()) { return error(); } - return first.is_integer(); -} -simdjson_really_inline simdjson_result simdjson_result::get_number_type() noexcept { - if (error()) { return error(); } - return first.get_number_type(); -} -simdjson_really_inline simdjson_result simdjson_result::get_number() noexcept { - if (error()) { return error(); } - return first.get_number(); -} -#if SIMDJSON_EXCEPTIONS -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator uint64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator int64_t() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator double() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator std::string_view() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -simdjson_really_inline simdjson_result::operator bool() noexcept(false) { - if (error()) { throw simdjson_error(error()); } - return first; -} -#endif - -simdjson_really_inline simdjson_result simdjson_result::raw_json_token() noexcept { - if (error()) { return error(); } - return first.raw_json_token(); -} - -simdjson_really_inline simdjson_result simdjson_result::current_location() noexcept { - if (error()) { return error(); } - return first.current_location(); -} - -simdjson_really_inline int32_t simdjson_result::current_depth() const noexcept { - if (error()) { return error(); } - return first.current_depth(); -} - -simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/value-inl.h */ -/* begin file include/simdjson/generic/ondemand/field-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit -simdjson_really_inline field::field() noexcept : std::pair() {} - -simdjson_really_inline field::field(raw_json_string key, ondemand::value &&value) noexcept - : std::pair(key, std::forward(value)) -{ -} - -simdjson_really_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { - raw_json_string key; - SIMDJSON_TRY( parent_iter.field_key().get(key) ); - SIMDJSON_TRY( parent_iter.field_value() ); - return field::start(parent_iter, key); -} - -simdjson_really_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { - return field(key, parent_iter.child()); -} - -simdjson_really_inline simdjson_warn_unused simdjson_result field::unescaped_key() noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. - simdjson_result answer = first.unescape(second.iter.string_buf_loc()); - first.consume(); - return answer; -} - -simdjson_really_inline raw_json_string field::key() const noexcept { - SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. - return first; -} - -simdjson_really_inline value &field::value() & noexcept { - return second; -} - -simdjson_really_inline value field::value() && noexcept { - return std::forward(*this).second; -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} -simdjson_really_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} - -simdjson_really_inline simdjson_result simdjson_result::key() noexcept { - if (error()) { return error(); } - return first.key(); -} -simdjson_really_inline simdjson_result simdjson_result::unescaped_key() noexcept { - if (error()) { return error(); } - return first.unescaped_key(); -} -simdjson_really_inline simdjson_result simdjson_result::value() noexcept { - if (error()) { return error(); } - return std::move(first.value()); -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/field-inl.h */ -/* begin file include/simdjson/generic/ondemand/object-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { return NO_SUCH_FIELD; } - return value(iter.child()); -} -simdjson_really_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); - if (!has_value) { return NO_SUCH_FIELD; } - return value(iter.child()); -} -simdjson_really_inline simdjson_result object::operator[](const std::string_view key) & noexcept { - return find_field_unordered(key); -} -simdjson_really_inline simdjson_result object::operator[](const std::string_view key) && noexcept { - return std::forward(*this).find_field_unordered(key); -} -simdjson_really_inline simdjson_result object::find_field(const std::string_view key) & noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { return NO_SUCH_FIELD; } - return value(iter.child()); -} -simdjson_really_inline simdjson_result object::find_field(const std::string_view key) && noexcept { - bool has_value; - SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); - if (!has_value) { return NO_SUCH_FIELD; } - return value(iter.child()); -} - -simdjson_really_inline simdjson_result object::start(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_object().error() ); - return object(iter); -} -simdjson_really_inline simdjson_result object::start_root(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.start_root_object().error() ); - return object(iter); -} -simdjson_really_inline error_code object::consume() noexcept { - if(iter.is_at_key()) { - /** - * whenever you are pointing at a key, calling skip_child() is - * unsafe because you will hit a string and you will assume that - * it is string value, and this mistake will lead you to make bad - * depth computation. - */ - /** - * We want to 'consume' the key. We could really - * just do _json_iter->return_current_and_advance(); at this - * point, but, for clarity, we will use the high-level API to - * eat the key. We assume that the compiler optimizes away - * most of the work. - */ - simdjson_unused raw_json_string actual_key; - auto error = iter.field_key().get(actual_key); - if (error) { iter.abandon(); return error; }; - // Let us move to the value while we are at it. - if ((error = iter.field_value())) { iter.abandon(); return error; } - } - auto error_skip = iter.json_iter().skip_child(iter.depth()-1); - if(error_skip) { iter.abandon(); } - return error_skip; -} - -simdjson_really_inline simdjson_result object::raw_json() noexcept { - const uint8_t * starting_point{iter.peek_start()}; - auto error = consume(); - if(error) { return error; } - const uint8_t * final_point{iter._json_iter->peek(0)}; - return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); -} - -simdjson_really_inline simdjson_result object::started(value_iterator &iter) noexcept { - SIMDJSON_TRY( iter.started_object().error() ); - return object(iter); -} - -simdjson_really_inline object object::resume(const value_iterator &iter) noexcept { - return iter; -} - -simdjson_really_inline object::object(const value_iterator &_iter) noexcept - : iter{_iter} -{ -} - -simdjson_really_inline simdjson_result object::begin() noexcept { -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } -#endif - return object_iterator(iter); -} -simdjson_really_inline simdjson_result object::end() noexcept { - return object_iterator(iter); -} - -inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { - if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } - json_pointer = json_pointer.substr(1); - size_t slash = json_pointer.find('/'); - std::string_view key = json_pointer.substr(0, slash); - // Grab the child with the given key - simdjson_result child; - - // If there is an escape character in the key, unescape it and then get the child. - size_t escape = key.find('~'); - if (escape != std::string_view::npos) { - // Unescape the key - std::string unescaped(key); - do { - switch (unescaped[escape+1]) { - case '0': - unescaped.replace(escape, 2, "~"); - break; - case '1': - unescaped.replace(escape, 2, "/"); - break; - default: - return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); - } - escape = unescaped.find('~', escape+1); - } while (escape != std::string::npos); - child = find_field(unescaped); // Take note find_field does not unescape keys when matching - } else { - child = find_field(key); - } - if(child.error()) { - return child; // we do not continue if there was an error - } - // If there is a /, we have to recurse and look up more of the path - if (slash != std::string_view::npos) { - child = child.at_pointer(json_pointer.substr(slash)); - } - return child; -} - -simdjson_really_inline simdjson_result object::count_fields() & noexcept { - size_t count{0}; - // Important: we do not consume any of the values. - for(simdjson_unused auto v : *this) { count++; } - // The above loop will always succeed, but we want to report errors. - if(iter.error()) { return iter.error(); } - // We need to move back at the start because we expect users to iterate through - // the object after counting the number of elements. - iter.reset_object(); - return count; -} - -simdjson_really_inline simdjson_result object::is_empty() & noexcept { - bool is_not_empty; - auto error = iter.reset_object().get(is_not_empty); - if(error) { return error; } - return !is_not_empty; -} - -simdjson_really_inline simdjson_result object::reset() & noexcept { - return iter.reset_object(); -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -simdjson_really_inline simdjson_result simdjson_result::begin() noexcept { - if (error()) { return error(); } - return first.begin(); -} -simdjson_really_inline simdjson_result simdjson_result::end() noexcept { - if (error()) { return error(); } - return first.end(); -} -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field_unordered(key); -} -simdjson_really_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field_unordered(key); -} -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { - if (error()) { return error(); } - return first[key]; -} -simdjson_really_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first)[key]; -} -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { - if (error()) { return error(); } - return first.find_field(key); -} -simdjson_really_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { - if (error()) { return error(); } - return std::forward(first).find_field(key); -} - -simdjson_really_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { - if (error()) { return error(); } - return first.at_pointer(json_pointer); -} - -inline simdjson_result simdjson_result::reset() noexcept { - if (error()) { return error(); } - return first.reset(); -} - -inline simdjson_result simdjson_result::is_empty() noexcept { - if (error()) { return error(); } - return first.is_empty(); -} - -simdjson_really_inline simdjson_result simdjson_result::count_fields() & noexcept { - if (error()) { return error(); } - return first.count_fields(); -} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/object-inl.h */ -/* begin file include/simdjson/generic/ondemand/parser-inl.h */ -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -simdjson_really_inline parser::parser(size_t max_capacity) noexcept - : _max_capacity{max_capacity} { -} - -simdjson_warn_unused simdjson_really_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { - if (new_capacity > max_capacity()) { return CAPACITY; } - if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } - - // string_capacity copied from document::allocate - _capacity = 0; - size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); - string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); -#ifdef SIMDJSON_DEVELOPMENT_CHECKS - start_positions.reset(new (std::nothrow) token_position[new_max_depth]); -#endif - if (implementation) { - SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); - SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); - } else { - SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); - } - _capacity = new_capacity; - _max_depth = new_max_depth; - return SUCCESS; -} - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - // Allocate if needed - if (capacity() < json.length() || !string_buf) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return document::start({ reinterpret_cast(json.data()), this }); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { - return iterate(padded_string_view(json, len, allocated)); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { - return iterate(padded_string_view(json, allocated)); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const std::string &json) & noexcept { - return iterate(padded_string_view(json)); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - padded_string_view json = result.value_unsafe(); - return iterate(json); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { - // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception - SIMDJSON_TRY( result.error() ); - const padded_string &json = result.value_unsafe(); - return iterate(json); -} - -simdjson_warn_unused simdjson_really_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } - - // Allocate if needed - if (capacity() < json.length()) { - SIMDJSON_TRY( allocate(json.length(), max_depth()) ); - } - - // Run stage 1. - SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); - return json_iterator(reinterpret_cast(json.data()), this); -} - -inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { - if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } - return document_stream(*this, buf, len, batch_size); -} -inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size) noexcept { - return iterate_many(reinterpret_cast(buf), len, batch_size); -} -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size) noexcept { - return iterate_many(s.data(), s.length(), batch_size); -} -inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size) noexcept { - return iterate_many(s.data(), s.length(), batch_size); -} - -simdjson_really_inline size_t parser::capacity() const noexcept { - return _capacity; -} -simdjson_really_inline size_t parser::max_capacity() const noexcept { - return _max_capacity; -} -simdjson_really_inline size_t parser::max_depth() const noexcept { - return _max_depth; -} - -simdjson_really_inline void parser::set_max_capacity(size_t max_capacity) noexcept { - size_t MINIMAL_DOCUMENT_CAPACITY = 32; - if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) { - _max_capacity = max_capacity; - } else { - _max_capacity = MINIMAL_DOCUMENT_CAPACITY; - } -} - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept - : implementation_simdjson_result_base(std::forward(value)) {} -simdjson_really_inline simdjson_result::simdjson_result(error_code error) noexcept - : implementation_simdjson_result_base(error) {} - -} // namespace simdjson -/* end file include/simdjson/generic/ondemand/parser-inl.h */ -/* begin file include/simdjson/generic/ondemand/document_stream-inl.h */ -#include -#include -#include -namespace simdjson { -namespace SIMDJSON_BUILTIN_IMPLEMENTATION { -namespace ondemand { - -#ifdef SIMDJSON_THREADS_ENABLED - -inline void stage1_worker::finish() { - // After calling "run" someone would call finish() to wait - // for the end of the processing. - // This function will wait until either the thread has done - // the processing or, else, the destructor has been called. - std::unique_lock lock(locking_mutex); - cond_var.wait(lock, [this]{return has_work == false;}); -} - -inline stage1_worker::~stage1_worker() { - // The thread may never outlive the stage1_worker instance - // and will always be stopped/joined before the stage1_worker - // instance is gone. - stop_thread(); -} - -inline void stage1_worker::start_thread() { - std::unique_lock lock(locking_mutex); - if(thread.joinable()) { - return; // This should never happen but we never want to create more than one thread. - } - thread = std::thread([this]{ - while(true) { - std::unique_lock thread_lock(locking_mutex); - // We wait for either "run" or "stop_thread" to be called. - cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); - // If, for some reason, the stop_thread() method was called (i.e., the - // destructor of stage1_worker is called, then we want to immediately destroy - // the thread (and not do any more processing). - if(!can_work) { - break; - } - this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, - this->_next_batch_start); - this->has_work = false; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify "finish" - thread_lock.unlock(); - } - } - ); -} - - -inline void stage1_worker::stop_thread() { - std::unique_lock lock(locking_mutex); - // We have to make sure that all locks can be released. - can_work = false; - has_work = false; - cond_var.notify_all(); - lock.unlock(); - if(thread.joinable()) { - thread.join(); - } -} - -inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { - std::unique_lock lock(locking_mutex); - owner = ds; - _next_batch_start = next_batch_start; - stage1_thread_parser = stage1; - has_work = true; - // The condition variable call should be moved after thread_lock.unlock() for performance - // reasons but thread sanitizers may report it as a data race if we do. - // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock - cond_var.notify_one(); // will notify the thread lock that we have work - lock.unlock(); -} - -#endif // SIMDJSON_THREADS_ENABLED - -simdjson_really_inline document_stream::document_stream( - ondemand::parser &_parser, - const uint8_t *_buf, - size_t _len, - size_t _batch_size -) noexcept - : parser{&_parser}, - buf{_buf}, - len{_len}, - batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, - error{SUCCESS} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change - #endif -{ -#ifdef SIMDJSON_THREADS_ENABLED - if(worker.get() == nullptr) { - error = MEMALLOC; - } -#endif -} - -simdjson_really_inline document_stream::document_stream() noexcept - : parser{nullptr}, - buf{nullptr}, - len{0}, - batch_size{0}, - error{UNINITIALIZED} - #ifdef SIMDJSON_THREADS_ENABLED - , use_thread(false) - #endif -{ -} - -simdjson_really_inline document_stream::~document_stream() noexcept -{ - #ifdef SIMDJSON_THREADS_ENABLED - worker.reset(); - #endif -} - -inline size_t document_stream::size_in_bytes() const noexcept { - return len; -} - -inline size_t document_stream::truncated_bytes() const noexcept { - if(error == CAPACITY) { return len - batch_start; } - return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; -} - -simdjson_really_inline document_stream::iterator::iterator() noexcept - : stream{nullptr}, finished{true} { -} - -simdjson_really_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept - : stream{_stream}, finished{is_end} { -} - -simdjson_really_inline simdjson_result document_stream::iterator::operator*() noexcept { - //if(stream->error) { return stream->error; } - return simdjson_result(stream->doc, stream->error); -} - -simdjson_really_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { - // If there is an error, then we want the iterator - // to be finished, no matter what. (E.g., we do not - // keep generating documents with errors, or go beyond - // a document with errors.) - // - // Users do not have to call "operator*()" when they use operator++, - // so we need to end the stream in the operator++ function. - // - // Note that setting finished = true is essential otherwise - // we would enter an infinite loop. - if (stream->error) { finished = true; } - // Note that stream->error() is guarded against error conditions - // (it will immediately return if stream->error casts to false). - // In effect, this next function does nothing when (stream->error) - // is true (hence the risk of an infinite loop). - stream->next(); - // If that was the last document, we're finished. - // It is the only type of error we do not want to appear - // in operator*. - if (stream->error == EMPTY) { finished = true; } - // If we had any other kind of error (not EMPTY) then we want - // to pass it along to the operator* and we cannot mark the result - // as "finished" just yet. - return *this; -} - -simdjson_really_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { - return finished != other.finished; -} - -simdjson_really_inline document_stream::iterator document_stream::begin() noexcept { - start(); - // If there are no documents, we're finished. - return iterator(this, error == EMPTY); -} - -simdjson_really_inline document_stream::iterator document_stream::end() noexcept { - return iterator(this, true); -} - -inline void document_stream::start() noexcept { - if (error) { return; } - error = parser->allocate(batch_size); - if (error) { return; } - // Always run the first stage 1 parse immediately - batch_start = 0; - error = run_stage1(*parser, batch_start); - while(error == EMPTY) { - // In exceptional cases, we may start with an empty block - batch_start = next_batch_start(); - if (batch_start >= len) { return; } - error = run_stage1(*parser, batch_start); - } - if (error) { return; } - doc_index = batch_start; - doc = document(json_iterator(&buf[batch_start], parser)); - doc.iter._streaming = true; - - #ifdef SIMDJSON_THREADS_ENABLED - if (use_thread && next_batch_start() < len) { - // Kick off the first thread on next batch if needed - error = stage1_thread_parser.allocate(batch_size); - if (error) { return; } - worker->start_thread(); - start_stage1_thread(); - if (error) { return; } - } - #endif // SIMDJSON_THREADS_ENABLED -} - -inline void document_stream::next() noexcept { - // We always enter at once once in an error condition. - if (error) { return; } - next_document(); - if (error) { return; } - auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); - doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; - - // Check if at end of structural indexes (i.e. at end of batch) - if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { - error = EMPTY; - // Load another batch (if available) - while (error == EMPTY) { - batch_start = next_batch_start(); - if (batch_start >= len) { break; } - #ifdef SIMDJSON_THREADS_ENABLED - if(use_thread) { - load_from_stage1_thread(); - } else { - error = run_stage1(*parser, batch_start); - } - #else - error = run_stage1(*parser, batch_start); - #endif - /** - * Whenever we move to another window, we need to update all pointers to make - * it appear as if the input buffer started at the beginning of the window. - * - * Take this input: - * - * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] - * - * Say you process the following window... - * - * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' - * - * When you do so, the json_iterator has a pointer at the beginning of the memory region - * (pointing at the beginning of '{"z"...'. - * - * When you move to the window that starts at... - * - * '[7, 10, 9] [15, 11, 12, 13] ... - * - * then it is not sufficient to just run stage 1. You also need to re-anchor the - * json_iterator so that it believes we are starting at '[7, 10, 9]...'. - * - * Under the DOM front-end, this gets done automatically because the parser owns - * the pointer the data, and when you call stage1 and then stage2 on the same - * parser, then stage2 will run on the pointer acquired by stage1. - * - * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that - * we used. But json_iterator has no callback when stage1 is called on the parser. - * In fact, I think that the parser is unaware of json_iterator. - * - * - * So we need to re-anchor the json_iterator after each call to stage 1 so that - * all of the pointers are in sync. - */ - doc.iter = json_iterator(&buf[batch_start], parser); - doc.iter._streaming = true; - /** - * End of resync. - */ - - if (error) { continue; } // If the error was EMPTY, we may want to load another batch. - doc_index = batch_start; - } - } -} - -inline void document_stream::next_document() noexcept { - // Go to next place where depth=0 (document depth) - error = doc.iter.skip_child(0); - if (error) { return; } - // Always set depth=1 at the start of document - doc.iter._depth = 1; - // Resets the string buffer at the beginning, thus invalidating the strings. - doc.iter._string_buf_loc = parser->string_buf.get(); - doc.iter._root = doc.iter.position(); -} - -inline size_t document_stream::next_batch_start() const noexcept { - return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; -} - -inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { - // This code only updates the structural index in the parser, it does not update any json_iterator - // instance. - size_t remaining = len - _batch_start; - if (remaining <= batch_size) { - return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); - } else { - return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); - } -} - -simdjson_really_inline size_t document_stream::iterator::current_index() const noexcept { - return stream->doc_index; -} - -simdjson_really_inline std::string_view document_stream::iterator::source() const noexcept { - auto depth = stream->doc.iter.depth(); - auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); - - // If at root, process the first token to determine if scalar value - if (stream->doc.iter.at_root()) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': // Depth=1 already at start of document - break; - case '}': case ']': - depth--; - break; - default: // Scalar value document - // TODO: Remove any trailing whitespaces - // This returns a string spanning from start of value to the beginning of the next document (excluded) - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); - } - cur_struct_index++; - } - - while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { - switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { - case '{': case '[': - depth++; - break; - case '}': case ']': - depth--; - break; - } - if (depth == 0) { break; } - cur_struct_index++; - } - - return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; -} - -inline error_code document_stream::iterator::error() const noexcept { - return stream->error; -} - -#ifdef SIMDJSON_THREADS_ENABLED - -inline void document_stream::load_from_stage1_thread() noexcept { - worker->finish(); - // Swap to the parser that was loaded up in the thread. Make sure the parser has - // enough memory to swap to, as well. - std::swap(stage1_thread_parser,*parser); - error = stage1_thread_error; - if (error) { return; } - - // If there's anything left, start the stage 1 thread! - if (next_batch_start() < len) { - start_stage1_thread(); - } -} - -inline void document_stream::start_stage1_thread() noexcept { - // we call the thread on a lambda that will update - // this->stage1_thread_error - // there is only one thread that may write to this value - // TODO this is NOT exception-safe. - this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error - size_t _next_batch_start = this->next_batch_start(); - - worker->run(this, & this->stage1_thread_parser, _next_batch_start); -} - -#endif // SIMDJSON_THREADS_ENABLED - -} // namespace ondemand -} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION -} // namespace simdjson - -namespace simdjson { - -simdjson_really_inline simdjson_result::simdjson_result( - error_code error -) noexcept : - implementation_simdjson_result_base(error) -{ -} -simdjson_really_inline simdjson_result::simdjson_result( - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value -) noexcept : - implementation_simdjson_result_base( - std::forward(value) - ) -{ -} - -} -/* end file include/simdjson/generic/ondemand/document_stream-inl.h */ -/* begin file include/simdjson/generic/ondemand/serialization-inl.h */ - - -namespace simdjson { - -inline std::string_view trim(const std::string_view str) noexcept { - // We can almost surely do better by rolling our own find_first_not_of function. - size_t first = str.find_first_not_of(" \t\n\r"); - // If we have the empty string (just white space), then no trimming is possible, and - // we return the empty string_view. - if (std::string_view::npos == first) { return std::string_view(); } - size_t last = str.find_last_not_of(" \t\n\r"); - return str.substr(first, (last - first + 1)); -} - - -inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} - -inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} - -inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept { - /** - * If we somehow receive a value that has already been consumed, - * then the following code could be in trouble. E.g., we create - * an array as needed, but if an array was already created, then - * it could be bad. - */ - using namespace SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type t; - auto error = x.type().get(t); - if(error != SUCCESS) { return error; } - switch (t) - { - case json_type::array: - { - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array array; - error = x.get_array().get(array); - if(error) { return error; } - return to_json_string(array); - } - case json_type::object: - { - SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object object; - error = x.get_object().get(object); - if(error) { return error; } - return to_json_string(object); - } - default: - return trim(x.raw_json_token()); - } -} - -inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} - -inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept { - std::string_view v; - auto error = x.raw_json().get(v); - if(error) {return error; } - return trim(v); -} - -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} - -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} - -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} - -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} - -inline simdjson_result to_json_string(simdjson_result x) { - if (x.error()) { return x.error(); } - return to_json_string(x.value_unsafe()); -} -} // namespace simdjson - -namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { - -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) { - std::string_view v; - auto error = simdjson::to_json_string(x).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } -} -#endif - -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } -} -#endif - -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } -} -#endif - -#if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - throw simdjson::simdjson_error(error); - } -} -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); -} -#else -inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) { - std::string_view v; - auto error = simdjson::to_json_string(value).get(v); - if(error == simdjson::SUCCESS) { - return (out << v); - } else { - return (out << error); - } -} -#endif -}}} // namespace simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand -/* end file include/simdjson/generic/ondemand/serialization-inl.h */ -/* end file include/simdjson/generic/ondemand-inl.h */ - - -namespace simdjson { - /** - * Represents the best statically linked simdjson implementation that can be used by the compiling - * program. - * - * Detects what options the program is compiled against, and picks the minimum implementation that - * will work on any computer that can run the program. For example, if you compile with g++ - * -march=westmere, it will pick the westmere implementation. The haswell implementation will - * still be available, and can be selected at runtime, but the builtin implementation (and any - * code that uses it) will use westmere. - */ - namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION; - /** - * @copydoc simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand - */ - namespace ondemand = SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; - /** - * Function which returns a pointer to an implementation matching the "builtin" implementation. - * The builtin implementation is the best statically linked simdjson implementation that can be used by the compiling - * program. If you compile with g++ -march=haswell, this will return the haswell implementation. - * It is handy to be able to check what builtin was used: builtin_implementation()->name(). - */ - const implementation * builtin_implementation(); -} // namespace simdjson - -#endif // SIMDJSON_BUILTIN_H -/* end file include/simdjson/builtin.h */ - -#endif // SIMDJSON_H -/* end file include/simdjson.h */