Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit dfaa715

Browse files
Adds StdStringView Numba type to hstr_ext (#994)
* Adds StdStringView Numba type to hstr_ext Motivation: for optimization purposes (avoiding copy when creating NRT manageble unicode instances) when working with string data stored in native extensions. * Moving stringlib to native * Fixing str_view_to_float impl and tests
1 parent 5ff3b16 commit dfaa715

18 files changed

+1597
-881
lines changed

sdc/_str_ext.cpp

Lines changed: 74 additions & 854 deletions
Large diffs are not rendered by default.

sdc/extensions/sdc_hashmap_ext.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,26 +66,29 @@ def gen_func_suffixes():
6666
product(key_suffixes, val_suffixes))
6767

6868

69-
def load_native_func(fname, module, skip_check=None):
70-
for suffix in gen_func_suffixes():
71-
if skip_check and skip_check(suffix):
69+
def load_native_func(fname, module, suffixes=None, skip_check=None):
70+
suffixes = suffixes or ['', ]
71+
for s in suffixes:
72+
if skip_check and skip_check(s):
7273
continue
73-
full_func_name = f'{fname}_{suffix}'
74+
fsuffix = f'_{s}' if s else ''
75+
full_func_name = f'{fname}{fsuffix}'
7476
ll.add_symbol(full_func_name,
7577
getattr(module, full_func_name))
7678

7779

78-
load_native_func('hashmap_create', hconc_dict)
79-
load_native_func('hashmap_size', hconc_dict)
80-
load_native_func('hashmap_set', hconc_dict)
81-
load_native_func('hashmap_contains', hconc_dict)
82-
load_native_func('hashmap_lookup', hconc_dict)
83-
load_native_func('hashmap_clear', hconc_dict)
84-
load_native_func('hashmap_pop', hconc_dict)
85-
load_native_func('hashmap_update', hconc_dict)
86-
load_native_func('hashmap_create_from_data', hconc_dict, lambda x: 'voidptr' in x)
87-
load_native_func('hashmap_getiter', hconc_dict)
88-
load_native_func('hashmap_iternext', hconc_dict)
80+
hashmap_func_suffixes = list(gen_func_suffixes())
81+
load_native_func('hashmap_create', hconc_dict, hashmap_func_suffixes)
82+
load_native_func('hashmap_size', hconc_dict, hashmap_func_suffixes)
83+
load_native_func('hashmap_set', hconc_dict, hashmap_func_suffixes)
84+
load_native_func('hashmap_contains', hconc_dict, hashmap_func_suffixes)
85+
load_native_func('hashmap_lookup', hconc_dict, hashmap_func_suffixes)
86+
load_native_func('hashmap_clear', hconc_dict, hashmap_func_suffixes)
87+
load_native_func('hashmap_pop', hconc_dict, hashmap_func_suffixes)
88+
load_native_func('hashmap_update', hconc_dict, hashmap_func_suffixes)
89+
load_native_func('hashmap_create_from_data', hconc_dict, hashmap_func_suffixes, lambda x: 'voidptr' in x)
90+
load_native_func('hashmap_getiter', hconc_dict, hashmap_func_suffixes)
91+
load_native_func('hashmap_iternext', hconc_dict, hashmap_func_suffixes)
8992

9093

9194
supported_numeric_key_types = [
@@ -894,7 +897,7 @@ def _hashmap_dump(typingctx, dict_type):
894897

895898
# load hashmap_dump here as otherwise module import will fail
896899
# since it's included in debug build only
897-
load_native_func('hashmap_dump', hconc_dict)
900+
load_native_func('hashmap_dump', hconc_dict, hashmap_func_suffixes)
898901
ty_key, ty_val = dict_type.key_type, dict_type.value_type
899902
key_type_postfix, value_type_postfix = _get_types_postfixes(ty_key, ty_val)
900903

Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,313 @@
1+
# *****************************************************************************
2+
# Copyright (c) 2019-2021, Intel Corporation All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# Redistributions of source code must retain the above copyright notice,
8+
# this list of conditions and the following disclaimer.
9+
#
10+
# Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
# *****************************************************************************
26+
27+
import ctypes as ct
28+
29+
import numba
30+
from numba.core import cgutils, types
31+
from numba.extending import (
32+
intrinsic,
33+
overload,
34+
overload_method,
35+
type_callable,
36+
lower_builtin,
37+
infer_getattr,
38+
)
39+
from numba.core.typing.templates import signature, bound_function, AttributeTemplate
40+
from llvmlite import ir as lir
41+
import llvmlite.binding as ll
42+
43+
from sdc.extensions.sdc_string_view_type import StdStringViewType
44+
from sdc import str_arr_ext
45+
from sdc import hstr_ext
46+
from sdc.str_arr_ext import decode_utf8
47+
from sdc.extensions.sdc_hashmap_ext import load_native_func
48+
49+
load_native_func('string_view_create', hstr_ext)
50+
load_native_func('string_view_create_with_data', hstr_ext)
51+
load_native_func('string_view_len', hstr_ext)
52+
load_native_func('string_view_get_data_ptr', hstr_ext)
53+
load_native_func('string_view_set_data', hstr_ext)
54+
load_native_func('string_view_to_int', hstr_ext)
55+
load_native_func('string_view_to_float64', hstr_ext)
56+
57+
58+
@intrinsic
59+
def string_view_create(typingctx):
60+
ret_type = StdStringViewType()
61+
62+
def codegen(context, builder, sig, args):
63+
nrt_table = context.nrt.get_nrt_api(builder)
64+
str_view_ctinfo = cgutils.create_struct_proxy(ret_type)(
65+
context, builder)
66+
fnty = lir.FunctionType(lir.VoidType(),
67+
[str_view_ctinfo.meminfo.type.as_pointer(), # meminfo to fill
68+
lir.IntType(8).as_pointer(), # NRT API func table
69+
])
70+
fn = cgutils.get_or_insert_function(builder.module, fnty, name="string_view_create")
71+
builder.call(fn,
72+
[str_view_ctinfo._get_ptr_by_name('meminfo'),
73+
nrt_table])
74+
str_view_ctinfo.data_ptr = context.nrt.meminfo_data(builder, str_view_ctinfo.meminfo)
75+
return str_view_ctinfo._getvalue()
76+
77+
return ret_type(), codegen
78+
79+
80+
@intrinsic
81+
def string_view_create_with_data(typingctx, data, size):
82+
ret_type = StdStringViewType()
83+
84+
def codegen(context, builder, sig, args):
85+
data_val, size_val = args
86+
87+
nrt_table = context.nrt.get_nrt_api(builder)
88+
str_view_ctinfo = cgutils.create_struct_proxy(ret_type)(
89+
context, builder)
90+
fnty = lir.FunctionType(lir.VoidType(),
91+
[str_view_ctinfo.meminfo.type.as_pointer(), # meminfo to fill
92+
lir.IntType(8).as_pointer(), # NRT API func table
93+
lir.IntType(8).as_pointer(), # char ptr to store in string view
94+
lir.IntType(64) # size of data to point to in bytes
95+
])
96+
fn = cgutils.get_or_insert_function(builder.module, fnty, name="string_view_create_with_data")
97+
builder.call(fn,
98+
[str_view_ctinfo._get_ptr_by_name('meminfo'),
99+
nrt_table,
100+
data_val,
101+
size_val])
102+
str_view_ctinfo.data_ptr = context.nrt.meminfo_data(builder, str_view_ctinfo.meminfo)
103+
return str_view_ctinfo._getvalue()
104+
105+
return ret_type(data, size), codegen
106+
107+
108+
@intrinsic
109+
def string_view_len(typingctx, str_view):
110+
ret_type = types.int64
111+
112+
def codegen(context, builder, sig, args):
113+
str_view_ctinfo = cgutils.create_struct_proxy(sig.args[0])(
114+
context, builder, value=args[0])
115+
fnty = lir.FunctionType(lir.IntType(64),
116+
[lir.IntType(8).as_pointer()])
117+
fn = cgutils.get_or_insert_function(builder.module, fnty, name="string_view_len")
118+
return builder.call(fn, [str_view_ctinfo.data_ptr])
119+
120+
return ret_type(str_view), codegen
121+
122+
123+
@overload(len)
124+
def len_string_view_ovld(str_view):
125+
if not isinstance(str_view, StdStringViewType):
126+
return None
127+
128+
def len_string_view_impl(str_view):
129+
return string_view_len(str_view)
130+
return len_string_view_impl
131+
132+
133+
@intrinsic
134+
def string_view_get_data_ptr(typingctx, str_view):
135+
ret_type = types.voidptr
136+
137+
def codegen(context, builder, sig, args):
138+
str_view_ctinfo = cgutils.create_struct_proxy(sig.args[0])(
139+
context, builder, value=args[0])
140+
fnty = lir.FunctionType(lir.IntType(8).as_pointer(),
141+
[lir.IntType(8).as_pointer()])
142+
fn = cgutils.get_or_insert_function(builder.module, fnty, name="string_view_get_data_ptr")
143+
return builder.call(fn, [str_view_ctinfo.data_ptr])
144+
145+
return ret_type(str_view), codegen
146+
147+
148+
@intrinsic
149+
def string_view_print(typingctx, str_view):
150+
151+
# load hashmap_dump here as otherwise module import will fail
152+
# since it's included in debug build only
153+
load_native_func('string_view_print', hstr_ext)
154+
155+
ret_type = types.void
156+
157+
def codegen(context, builder, sig, args):
158+
str_view_ctinfo = cgutils.create_struct_proxy(sig.args[0])(
159+
context, builder, value=args[0])
160+
fnty = lir.FunctionType(lir.VoidType(),
161+
[lir.IntType(8).as_pointer()])
162+
fn = cgutils.get_or_insert_function(builder.module, fnty, name="string_view_print")
163+
builder.call(fn, [str_view_ctinfo.data_ptr])
164+
165+
return ret_type(str_view), codegen
166+
167+
168+
@intrinsic
169+
def string_view_set_data(typingctx, str_view, data, size):
170+
ret_type = types.voidptr
171+
172+
def codegen(context, builder, sig, args):
173+
new_data_val, new_data_size = args[1:]
174+
str_view_ctinfo = cgutils.create_struct_proxy(sig.args[0])(
175+
context, builder, value=args[0])
176+
fnty = lir.FunctionType(lir.VoidType(),
177+
[lir.IntType(8).as_pointer(),
178+
lir.IntType(8).as_pointer(),
179+
lir.IntType(64)])
180+
fn = cgutils.get_or_insert_function(builder.module, fnty, name="string_view_set_data")
181+
return builder.call(fn,
182+
[str_view_ctinfo.data_ptr,
183+
new_data_val,
184+
new_data_size])
185+
186+
return ret_type(str_view, data, size), codegen
187+
188+
189+
@intrinsic
190+
def string_view_to_int(typingctx, str_view, base):
191+
ret_type = types.Tuple([types.bool_, types.int64])
192+
193+
def codegen(context, builder, sig, args):
194+
str_view_val, base_val = args
195+
str_view_ctinfo = cgutils.create_struct_proxy(sig.args[0])(
196+
context, builder, value=str_view_val)
197+
fnty = lir.FunctionType(lir.IntType(8),
198+
[lir.IntType(8).as_pointer(),
199+
lir.IntType(64),
200+
lir.IntType(64).as_pointer()])
201+
fn = cgutils.get_or_insert_function(builder.module, fnty, name="string_view_to_int")
202+
res_ptr = cgutils.alloca_once(builder, lir.IntType(64))
203+
status = builder.call(fn,
204+
[str_view_ctinfo.data_ptr,
205+
base_val,
206+
res_ptr])
207+
status_as_bool = context.cast(builder, status, types.int8, types.bool_)
208+
return context.make_tuple(builder, ret_type, [status_as_bool, builder.load(res_ptr)])
209+
210+
return ret_type(str_view, base), codegen
211+
212+
213+
@overload(int)
214+
def string_view_to_int_ovld(x, base=10):
215+
if not isinstance(x, StdStringViewType):
216+
return None
217+
218+
def string_view_to_int_impl(x, base=10):
219+
# FIXME: raise from numba compiled code will cause leak of string_view (no decref emitted)
220+
status, res = string_view_to_int(x, base)
221+
if status:
222+
raise ValueError("invalid string for conversion with int()")
223+
return res
224+
return string_view_to_int_impl
225+
226+
227+
@intrinsic
228+
def string_view_to_float64(typingctx, str_view):
229+
ret_type = types.Tuple([types.bool_, types.float64])
230+
231+
def codegen(context, builder, sig, args):
232+
str_view_val, = args
233+
str_view_ctinfo = cgutils.create_struct_proxy(sig.args[0])(
234+
context, builder, value=str_view_val)
235+
fnty = lir.FunctionType(lir.IntType(8),
236+
[lir.IntType(8).as_pointer(),
237+
lir.DoubleType().as_pointer()])
238+
fn = cgutils.get_or_insert_function(builder.module, fnty, name="string_view_to_float64")
239+
res_ptr = cgutils.alloca_once(builder, lir.DoubleType())
240+
status = builder.call(fn,
241+
[str_view_ctinfo.data_ptr,
242+
res_ptr])
243+
status_as_bool = context.cast(builder, status, types.int8, types.bool_)
244+
return context.make_tuple(builder, ret_type, [status_as_bool, builder.load(res_ptr)])
245+
246+
return ret_type(str_view), codegen
247+
248+
249+
@overload(float)
250+
def string_view_to_float_ovld(x):
251+
if not isinstance(x, StdStringViewType):
252+
return None
253+
254+
def string_view_to_float_impl(x):
255+
status, res = string_view_to_float64(x)
256+
if status:
257+
raise ValueError("invalid string for conversion with float()")
258+
return res
259+
return string_view_to_float_impl
260+
261+
262+
@overload(str)
263+
def string_view_str_ovld(str_view):
264+
if not isinstance(str_view, StdStringViewType):
265+
return None
266+
267+
def string_view_str_impl(str_view):
268+
str_view_data_ptr = string_view_get_data_ptr(str_view)
269+
return decode_utf8(str_view_data_ptr, len(str_view))
270+
271+
return string_view_str_impl
272+
273+
274+
def install_string_view_delegating_methods(nbtype):
275+
# TO-DO: generalize?
276+
from numba.core.registry import CPUDispatcher
277+
from numba.core import utils
278+
279+
# need to do refresh, as unicode templates may not be avaialble yet
280+
typingctx = CPUDispatcher.targetdescr.typing_context
281+
typingctx.refresh()
282+
283+
# filter only methods from all attribute templates registered for nbtype
284+
method_templates = list(typingctx._get_attribute_templates(nbtype))
285+
method_templates = [x for x in method_templates if getattr(x, 'is_method', None)]
286+
method_names = [x._attr for x in method_templates]
287+
288+
# for all unicode methods register corresponding StringView overload
289+
# that delegates to it via creating a temporary unicode string
290+
for this_name, this_template in zip(method_names, method_templates):
291+
pysig_str = str(utils.pysignature(this_template._overload_func))
292+
pysig_params = utils.pysignature(this_template._overload_func).parameters.keys()
293+
self_param_name = list(pysig_params)[0]
294+
method_param_names = list(pysig_params)[1:]
295+
inner_call_params = ', '.join([f'{x}={x}' for x in method_param_names])
296+
297+
from textwrap import dedent
298+
func_name = f'string_view_{this_name}'
299+
text = dedent(f"""
300+
@overload_method(StdStringViewType, '{this_name}')
301+
def {func_name}_ovld{pysig_str}:
302+
if not isinstance({self_param_name}, StdStringViewType):
303+
return None
304+
def _impl{pysig_str}:
305+
return str({self_param_name}).{this_name}({inner_call_params})
306+
return _impl
307+
""")
308+
global_vars, local_vars = {'StdStringViewType': StdStringViewType,
309+
'overload_method': overload_method}, {}
310+
exec(text, global_vars, local_vars)
311+
312+
313+
install_string_view_delegating_methods(types.unicode_type)

0 commit comments

Comments
 (0)