Skip to content

Commit 9c779af

Browse files
committed
Sync from mypy and bump version to 0.3.0
Also update smoke test. Includes `librt.internal.{write_bytes,read_bytes}`.
1 parent 48cd4d7 commit 9c779af

File tree

8 files changed

+197
-11
lines changed

8 files changed

+197
-11
lines changed

lib-rt/CPy.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ CPyTagged CPyTagged_Remainder_(CPyTagged left, CPyTagged right);
148148
CPyTagged CPyTagged_BitwiseLongOp_(CPyTagged a, CPyTagged b, char op);
149149
CPyTagged CPyTagged_Rshift_(CPyTagged left, CPyTagged right);
150150
CPyTagged CPyTagged_Lshift_(CPyTagged left, CPyTagged right);
151+
CPyTagged CPyTagged_BitLength(CPyTagged self);
151152

152153
PyObject *CPyTagged_Str(CPyTagged n);
153154
CPyTagged CPyTagged_FromFloat(double f);
@@ -734,6 +735,7 @@ static inline char CPyDict_CheckSize(PyObject *dict, Py_ssize_t size) {
734735
#define BOTHSTRIP 2
735736

736737
char CPyStr_Equal(PyObject *str1, PyObject *str2);
738+
char CPyStr_EqualLiteral(PyObject *str, PyObject *literal_str, Py_ssize_t literal_length);
737739
PyObject *CPyStr_Build(Py_ssize_t len, ...);
738740
PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index);
739741
PyObject *CPyStr_GetItemUnsafe(PyObject *str, Py_ssize_t index);

lib-rt/int_ops.c

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
#include <Python.h>
66
#include "CPy.h"
77

8+
#ifdef _MSC_VER
9+
#include <intrin.h>
10+
#endif
11+
812
#ifndef _WIN32
913
// On 64-bit Linux and macOS, ssize_t and long are both 64 bits, and
1014
// PyLong_FromLong is faster than PyLong_FromSsize_t, so use the faster one
@@ -15,6 +19,17 @@
1519
#define CPyLong_FromSsize_t PyLong_FromSsize_t
1620
#endif
1721

22+
#if defined(__GNUC__) || defined(__clang__)
23+
# if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || (defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 8)
24+
# define CPY_CLZ(x) __builtin_clzll((unsigned long long)(x))
25+
# define CPY_BITS 64
26+
# else
27+
# define CPY_CLZ(x) __builtin_clz((unsigned int)(x))
28+
# define CPY_BITS 32
29+
# endif
30+
#endif
31+
32+
1833
CPyTagged CPyTagged_FromSsize_t(Py_ssize_t value) {
1934
// We use a Python object if the value shifted left by 1 is too
2035
// large for Py_ssize_t
@@ -581,3 +596,52 @@ double CPyTagged_TrueDivide(CPyTagged x, CPyTagged y) {
581596
}
582597
return 1.0;
583598
}
599+
600+
// int.bit_length()
601+
CPyTagged CPyTagged_BitLength(CPyTagged self) {
602+
// Handle zero
603+
if (self == 0) {
604+
return 0;
605+
}
606+
607+
// Fast path for small (tagged) ints
608+
if (CPyTagged_CheckShort(self)) {
609+
Py_ssize_t val = CPyTagged_ShortAsSsize_t(self);
610+
Py_ssize_t absval = val < 0 ? -val : val;
611+
int bits = 0;
612+
if (absval) {
613+
#if defined(_MSC_VER)
614+
#if defined(_WIN64)
615+
unsigned long idx;
616+
if (_BitScanReverse64(&idx, (unsigned __int64)absval)) {
617+
bits = (int)(idx + 1);
618+
}
619+
#else
620+
unsigned long idx;
621+
if (_BitScanReverse(&idx, (unsigned long)absval)) {
622+
bits = (int)(idx + 1);
623+
}
624+
#endif
625+
#elif defined(__GNUC__) || defined(__clang__)
626+
bits = (int)(CPY_BITS - CPY_CLZ(absval));
627+
#else
628+
// Fallback to loop if no builtin
629+
while (absval) {
630+
absval >>= 1;
631+
bits++;
632+
}
633+
#endif
634+
}
635+
return bits << 1;
636+
}
637+
638+
// Slow path for big ints
639+
PyObject *pyint = CPyTagged_AsObject(self);
640+
int bits = _PyLong_NumBits(pyint);
641+
Py_DECREF(pyint);
642+
if (bits < 0) {
643+
// _PyLong_NumBits sets an error on failure
644+
return CPY_INT_TAG;
645+
}
646+
return bits << 1;
647+
}

lib-rt/librt_internal.c

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,100 @@ write_str(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames
346346
return Py_None;
347347
}
348348

349+
/*
350+
bytes format: size followed by bytes
351+
short bytes (len <= 127): single byte for size as `(uint8_t)size << 1`
352+
long bytes: \x01 followed by size as Py_ssize_t
353+
*/
354+
355+
static PyObject*
356+
read_bytes_internal(PyObject *data) {
357+
_CHECK_BUFFER(data, NULL)
358+
359+
// Read length.
360+
Py_ssize_t size;
361+
_CHECK_READ(data, 1, NULL)
362+
uint8_t first = _READ(data, uint8_t)
363+
if (likely(first != LONG_STR_TAG)) {
364+
// Common case: short bytes (len <= 127).
365+
size = (Py_ssize_t)(first >> 1);
366+
} else {
367+
_CHECK_READ(data, sizeof(CPyTagged), NULL)
368+
size = _READ(data, Py_ssize_t)
369+
}
370+
// Read bytes content.
371+
char *buf = ((BufferObject *)data)->buf;
372+
_CHECK_READ(data, size, NULL)
373+
PyObject *res = PyBytes_FromStringAndSize(
374+
buf + ((BufferObject *)data)->pos, (Py_ssize_t)size
375+
);
376+
if (unlikely(res == NULL))
377+
return NULL;
378+
((BufferObject *)data)->pos += size;
379+
return res;
380+
}
381+
382+
static PyObject*
383+
read_bytes(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames) {
384+
static const char * const kwlist[] = {"data", 0};
385+
static CPyArg_Parser parser = {"O:read_bytes", kwlist, 0};
386+
PyObject *data;
387+
if (unlikely(!CPyArg_ParseStackAndKeywordsOneArg(args, nargs, kwnames, &parser, &data))) {
388+
return NULL;
389+
}
390+
return read_bytes_internal(data);
391+
}
392+
393+
static char
394+
write_bytes_internal(PyObject *data, PyObject *value) {
395+
_CHECK_BUFFER(data, CPY_NONE_ERROR)
396+
397+
const char *chunk = PyBytes_AsString(value);
398+
if (unlikely(chunk == NULL))
399+
return CPY_NONE_ERROR;
400+
Py_ssize_t size = PyBytes_GET_SIZE(value);
401+
402+
Py_ssize_t need;
403+
// Write length.
404+
if (likely(size <= MAX_SHORT_LEN)) {
405+
// Common case: short bytes (len <= 127) store as single byte.
406+
need = size + 1;
407+
_CHECK_SIZE(data, need)
408+
_WRITE(data, uint8_t, (uint8_t)size << 1)
409+
} else {
410+
need = size + sizeof(Py_ssize_t) + 1;
411+
_CHECK_SIZE(data, need)
412+
_WRITE(data, uint8_t, LONG_STR_TAG)
413+
_WRITE(data, Py_ssize_t, size)
414+
}
415+
// Write bytes content.
416+
char *buf = ((BufferObject *)data)->buf;
417+
memcpy(buf + ((BufferObject *)data)->pos, chunk, size);
418+
((BufferObject *)data)->pos += size;
419+
((BufferObject *)data)->end += need;
420+
return CPY_NONE;
421+
}
422+
423+
static PyObject*
424+
write_bytes(PyObject *self, PyObject *const *args, size_t nargs, PyObject *kwnames) {
425+
static const char * const kwlist[] = {"data", "value", 0};
426+
static CPyArg_Parser parser = {"OO:write_bytes", kwlist, 0};
427+
PyObject *data;
428+
PyObject *value;
429+
if (unlikely(!CPyArg_ParseStackAndKeywordsSimple(args, nargs, kwnames, &parser, &data, &value))) {
430+
return NULL;
431+
}
432+
if (unlikely(!PyBytes_Check(value))) {
433+
PyErr_SetString(PyExc_TypeError, "value must be a bytes object");
434+
return NULL;
435+
}
436+
if (unlikely(write_bytes_internal(data, value) == CPY_NONE_ERROR)) {
437+
return NULL;
438+
}
439+
Py_INCREF(Py_None);
440+
return Py_None;
441+
}
442+
349443
/*
350444
float format:
351445
stored as a C double
@@ -565,6 +659,8 @@ static PyMethodDef librt_internal_module_methods[] = {
565659
{"read_bool", (PyCFunction)read_bool, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("read a bool")},
566660
{"write_str", (PyCFunction)write_str, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("write a string")},
567661
{"read_str", (PyCFunction)read_str, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("read a string")},
662+
{"write_bytes", (PyCFunction)write_bytes, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("write bytes")},
663+
{"read_bytes", (PyCFunction)read_bytes, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("read bytes")},
568664
{"write_float", (PyCFunction)write_float, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("write a float")},
569665
{"read_float", (PyCFunction)read_float, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("read a float")},
570666
{"write_int", (PyCFunction)write_int, METH_FASTCALL | METH_KEYWORDS, PyDoc_STR("write an int")},
@@ -590,7 +686,7 @@ librt_internal_module_exec(PyObject *m)
590686
}
591687

592688
// Export mypy internal C API, be careful with the order!
593-
static void *NativeInternal_API[14] = {
689+
static void *NativeInternal_API[16] = {
594690
(void *)Buffer_internal,
595691
(void *)Buffer_internal_empty,
596692
(void *)Buffer_getvalue_internal,
@@ -605,6 +701,8 @@ librt_internal_module_exec(PyObject *m)
605701
(void *)write_tag_internal,
606702
(void *)read_tag_internal,
607703
(void *)NativeInternal_ABI_Version,
704+
(void *)write_bytes_internal,
705+
(void *)read_bytes_internal,
608706
};
609707
PyObject *c_api_object = PyCapsule_New((void *)NativeInternal_API, "librt.internal._C_API", NULL);
610708
if (PyModule_Add(m, "_C_API", c_api_object) < 0) {

lib-rt/librt_internal.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ static CPyTagged read_int_internal(PyObject *data);
1919
static char write_tag_internal(PyObject *data, uint8_t value);
2020
static uint8_t read_tag_internal(PyObject *data);
2121
static int NativeInternal_ABI_Version(void);
22+
static char write_bytes_internal(PyObject *data, PyObject *value);
23+
static PyObject *read_bytes_internal(PyObject *data);
2224

2325
#else
2426

@@ -38,6 +40,8 @@ static void **NativeInternal_API;
3840
#define write_tag_internal (*(char (*)(PyObject *source, uint8_t value)) NativeInternal_API[11])
3941
#define read_tag_internal (*(uint8_t (*)(PyObject *source)) NativeInternal_API[12])
4042
#define NativeInternal_ABI_Version (*(int (*)(void)) NativeInternal_API[13])
43+
#define write_bytes_internal (*(char (*)(PyObject *source, PyObject *value)) NativeInternal_API[14])
44+
#define read_bytes_internal (*(PyObject* (*)(PyObject *source)) NativeInternal_API[15])
4145

4246
static int
4347
import_librt_internal(void)

lib-rt/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Build script for mypyc C runtime library unit tests.
1+
"""Build script for mypyc C runtime library and C API unit tests.
22
33
The tests are written in C++ and use the Google Test framework.
44
"""

lib-rt/str_ops.c

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,20 +64,33 @@ make_bloom_mask(int kind, const void* ptr, Py_ssize_t len)
6464
#undef BLOOM_UPDATE
6565
}
6666

67-
// Adapted from CPython 3.13.1 (_PyUnicode_Equal)
68-
char CPyStr_Equal(PyObject *str1, PyObject *str2) {
69-
if (str1 == str2) {
70-
return 1;
71-
}
72-
Py_ssize_t len = PyUnicode_GET_LENGTH(str1);
73-
if (PyUnicode_GET_LENGTH(str2) != len)
67+
static inline char _CPyStr_Equal_NoIdentCheck(PyObject *str1, PyObject *str2, Py_ssize_t str2_length) {
68+
// This helper function only exists to deduplicate code in CPyStr_Equal and CPyStr_EqualLiteral
69+
Py_ssize_t str1_length = PyUnicode_GET_LENGTH(str1);
70+
if (str1_length != str2_length)
7471
return 0;
7572
int kind = PyUnicode_KIND(str1);
7673
if (PyUnicode_KIND(str2) != kind)
7774
return 0;
7875
const void *data1 = PyUnicode_DATA(str1);
7976
const void *data2 = PyUnicode_DATA(str2);
80-
return memcmp(data1, data2, len * kind) == 0;
77+
return memcmp(data1, data2, str1_length * kind) == 0;
78+
}
79+
80+
// Adapted from CPython 3.13.1 (_PyUnicode_Equal)
81+
char CPyStr_Equal(PyObject *str1, PyObject *str2) {
82+
if (str1 == str2) {
83+
return 1;
84+
}
85+
Py_ssize_t str2_length = PyUnicode_GET_LENGTH(str2);
86+
return _CPyStr_Equal_NoIdentCheck(str1, str2, str2_length);
87+
}
88+
89+
char CPyStr_EqualLiteral(PyObject *str, PyObject *literal_str, Py_ssize_t literal_length) {
90+
if (str == literal_str) {
91+
return 1;
92+
}
93+
return _CPyStr_Equal_NoIdentCheck(str, literal_str, literal_length);
8194
}
8295

8396
PyObject *CPyStr_GetItem(PyObject *str, CPyTagged index) {

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ authors = [
1919
{name = "Jukka Lehtosalo", email = "jukka.lehtosalo@iki.fi"},
2020
{name = "Ivan Levkivskyi", email = "levkivskyi@gmail.com"},
2121
]
22-
version = "0.2.3"
22+
version = "0.3.0"
2323
license = {text = "MIT"}
2424
classifiers = [
2525
"Development Status :: 3 - Alpha",

smoke_tests.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
write_float, read_float,
99
write_int, read_int,
1010
write_tag, read_tag,
11+
write_bytes, read_bytes,
1112
)
1213

1314
Tag = u8
@@ -43,6 +44,8 @@ def test_buffer_roundtrip() -> None:
4344
write_int(b, 2 ** 85)
4445
write_int(b, 1234512344)
4546
write_int(b, 1234512345)
47+
write_bytes(b, b"foobar")
48+
write_bytes(b, b"abc" * 1000)
4649

4750
b = Buffer(b.getvalue())
4851
assert read_str(b) == "foo"
@@ -58,6 +61,8 @@ def test_buffer_roundtrip() -> None:
5861
assert read_int(b) == 2 ** 85
5962
assert read_int(b) == 1234512344
6063
assert read_int(b) == 1234512345
64+
assert read_bytes(b) == b"foobar"
65+
assert read_bytes(b) == b"abc" * 1000
6166

6267

6368
def test_buffer_int_size() -> None:

0 commit comments

Comments
 (0)