-
Notifications
You must be signed in to change notification settings - Fork 233
pyarrow: Check compatibility of pyarrow.array with string type #2933
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 18 commits
1f32a7c
4c4e064
07fbca6
d379e46
cfda386
0a6cda5
f59f93c
757da24
17c1e9c
0105d64
3ad0c86
4bea288
371174a
b588730
faf2065
b2efbb4
9fd77dc
ccf4eff
44d01ed
a927202
7b00248
7dc353b
ce76152
ef431af
acaf350
6ad6eb9
d88accd
265132e
edb3438
8172102
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,7 @@ | |
| import pandas as pd | ||
| import xarray as xr | ||
| from packaging.version import Version | ||
| from pygmt._typing import StringArrayTypes | ||
| from pygmt.exceptions import GMTInvalidInput | ||
|
|
||
|
|
||
|
|
@@ -273,14 +274,15 @@ def sequence_to_ctypes_array( | |
| return (ctype * size)(*sequence) | ||
|
|
||
|
|
||
| def strings_to_ctypes_array(strings: Sequence[str] | np.ndarray) -> ctp.Array: | ||
| def strings_to_ctypes_array(strings: StringArrayTypes) -> ctp.Array: | ||
| """ | ||
| Convert a sequence (e.g., a list) of strings into a ctypes array. | ||
| Convert a sequence (e.g., a list) or numpy.ndarray of strings or a | ||
| pyarrow.StringArray into a ctypes array. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| strings | ||
| A sequence of strings. | ||
| A sequence of strings, a numpy.ndarray of str dtype, or a pyarrow.StringArray. | ||
|
|
||
| Returns | ||
| ------- | ||
|
|
@@ -296,7 +298,7 @@ def strings_to_ctypes_array(strings: Sequence[str] | np.ndarray) -> ctp.Array: | |
| >>> [s.decode() for s in ctypes_array] | ||
| ['first', 'second', 'third'] | ||
| """ | ||
| return (ctp.c_char_p * len(strings))(*[s.encode() for s in strings]) | ||
| return (ctp.c_char_p * len(strings))(*[s.encode() for s in np.asarray(strings)]) | ||
|
||
|
|
||
|
|
||
| def array_to_datetime(array: Sequence[Any] | np.ndarray) -> np.ndarray: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,12 +8,31 @@ | |
| from pygmt import clib | ||
| from pygmt.exceptions import GMTCLibError | ||
| from pygmt.helpers import GMTTempFile | ||
| from pygmt.helpers.testing import skip_if_no | ||
|
||
|
|
||
| try: | ||
| import pyarrow as pa | ||
| except ImportError: | ||
| pa = None | ||
|
|
||
|
|
||
| @pytest.mark.benchmark | ||
| def test_put_strings(): | ||
| @pytest.mark.parametrize( | ||
| ("array_func", "dtype"), | ||
| [ | ||
| pytest.param(np.array, {"dtype": np.str_}, id="str"), | ||
| pytest.param( | ||
| getattr(pa, "array", None), | ||
| {"type": "string"}, # pa.string() | ||
| marks=skip_if_no(package="pyarrow"), | ||
| id="pyarrow", | ||
| ), | ||
| ], | ||
| ) | ||
| def test_put_strings(array_func, dtype): | ||
| """ | ||
| Check that assigning a numpy array of dtype str to a dataset works. | ||
| Check that assigning a numpy array of dtype str, or a pyarrow.StringArray to a | ||
| dataset works. | ||
| """ | ||
| with clib.Session() as lib: | ||
| dataset = lib.create_data( | ||
|
|
@@ -24,7 +43,7 @@ def test_put_strings(): | |
| ) | ||
| x = np.array([1, 2, 3, 4, 5], dtype=np.int32) | ||
| y = np.array([6, 7, 8, 9, 10], dtype=np.int32) | ||
| strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=np.str_) | ||
| strings = array_func(["a", "bc", "defg", "hijklmn", "opqrst"], **dtype) | ||
| lib.put_vector(dataset, column=lib["GMT_X"], vector=x) | ||
| lib.put_vector(dataset, column=lib["GMT_Y"], vector=y) | ||
| lib.put_strings( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,8 +10,14 @@ | |
| from pygmt.clib.session import DTYPES_NUMERIC | ||
| from pygmt.exceptions import GMTCLibError, GMTInvalidInput | ||
| from pygmt.helpers import GMTTempFile | ||
| from pygmt.helpers.testing import skip_if_no | ||
| from pygmt.tests.test_clib import mock | ||
|
|
||
| try: | ||
| import pyarrow as pa | ||
| except ImportError: | ||
| pa = None | ||
|
|
||
| POINTS_DATA = Path(__file__).parent / "data" / "points.txt" | ||
|
|
||
|
|
||
|
|
@@ -137,3 +143,37 @@ def test_open_virtual_file(): | |
| bounds = "\t".join([f"<{col.min():.0f}/{col.max():.0f}>" for col in data.T]) | ||
| expected = f"<matrix memory>: N = {shape[0]}\t{bounds}\n" | ||
| assert output == expected | ||
|
|
||
|
|
||
| @pytest.mark.benchmark | ||
|
||
| @pytest.mark.parametrize( | ||
| ("array_func", "dtype"), | ||
| [ | ||
| pytest.param(np.array, {"dtype": np.str_}, id="str"), | ||
| pytest.param(np.array, {"dtype": np.object_}, id="object"), | ||
| pytest.param( | ||
| getattr(pa, "array", None), | ||
| {"type": "string"}, # pa.string() | ||
| marks=skip_if_no(package="pyarrow"), | ||
| id="pyarrow", | ||
| ), | ||
| ], | ||
| ) | ||
| def test_virtualfile_from_vectors_one_string_or_object_column(array_func, dtype): | ||
| """ | ||
| Test passing in one column with string (numpy/pyarrow) or object (numpy) | ||
| dtype into virtual file dataset. | ||
| """ | ||
| size = 5 | ||
| x = np.arange(size, dtype=np.int32) | ||
| y = np.arange(size, size * 2, 1, dtype=np.int32) | ||
| strings = array_func(["a", "bc", "defg", "hijklmn", "opqrst"], **dtype) | ||
| with clib.Session() as lib: | ||
| with lib.virtualfile_from_vectors(x, y, strings) as vfile: | ||
| with GMTTempFile() as outfile: | ||
| lib.call_module("convert", [vfile, f"->{outfile.name}"]) | ||
| output = outfile.read(keep_tabs=True) | ||
| expected = "".join( | ||
| f"{i}\t{j}\t{k}\n" for i, j, k in zip(x, y, strings, strict=True) | ||
| ) | ||
| assert output == expected | ||
Uh oh!
There was an error while loading. Please reload this page.