From 9ebb87b7c66e9f81369c9f63cd930138eee55195 Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Sun, 8 Sep 2024 21:36:46 -0400 Subject: [PATCH 01/13] Added document and a test case for newlines_in_values case. --- pandas/io/parsers/readers.py | 21 +++++++++++++++++++-- pandas/tests/io/parser/test_unsupported.py | 16 ++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 2916e4d98cce4..453c4e46ff26d 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -247,7 +247,11 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): .. versionadded:: 1.4.0 The 'pyarrow' engine was added as an *experimental* engine, and some features - are unsupported, or may not work correctly, with this engine. + are unsupported, or may not work correctly, with this engine. For example, + the newlines_in_values in the ParseOptions of the pyarrow allows handling the + newline characters within values when parsing csv files. However, this is not + currently supported by Pandas. In this case, the 'csv' module in the pyarrow + should be used instead. For more information, refer to the example. converters : dict of {{Hashable : Callable}}, optional Functions for converting values in specified columns. Keys can either be column labels or column indices. @@ -545,12 +549,25 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): ... parse_dates=[1, 2], ... date_format={{'col 2': '%d/%m/%Y', 'col 3': '%a %d %b %Y'}}, ... ) # doctest: +SKIP - >>> df.dtypes # doctest: +SKIP col 1 int64 col 2 datetime64[ns] col 3 datetime64[ns] dtype: object + +The csv in pyarrow must be used if values have new line character. + +>>> from pyarrow import csv +>>> parse_options = csv.ParseOptions(newlines_in_values=True) +>>> table = csv.read_csv("./example.csv", parse_options=parse_options) +>>> df = table.to_pandas() +>>> df.head() + text idx +0 ab\ncd 0 +1 ab\ncd 1 +2 ab\ncd 2 +3 ab\ncd 3 +4 ab\ncd 4 """ # noqa: E501 diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 07f84466e3ac2..134a97518e627 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -19,6 +19,7 @@ from pandas.io.parsers import read_csv import pandas.io.parsers.readers as parsers +from pandas.core.frame import DataFrame pytestmark = pytest.mark.filterwarnings( "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" @@ -150,6 +151,21 @@ def test_pyarrow_engine(self): with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine="pyarrow", **kwargs) + def test_pyarrow_newlines_in_values(self): + msg = ( + "CSV parser got out of sync with chunker. " + "This can mean the data file contains cell values spanning multiple lines; " + "please consider enabling the option 'newlines_in_values'." + ) + rows = [] + for idx in range(1_000_000): + rows.append({"text": "ab\ncd", "idx" : idx}) + df = DataFrame(rows) + df.to_csv("test.csv", index=False) + with pytest.raises(ValueError, match=msg): + read_csv("test.csv", engine="pyarrow") + os.unlink("test.csv") + def test_on_bad_lines_callable_python_or_pyarrow(self, all_parsers): # GH 5686 # GH 54643 From ccd14a84938ecdbb47338580dc12c62ebd64be81 Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Sun, 8 Sep 2024 22:16:31 -0400 Subject: [PATCH 02/13] removed traied spaces and changed test codes. --- pandas/io/parsers/readers.py | 4 ++-- pandas/tests/io/parser/test_unsupported.py | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 453c4e46ff26d..3f5f0f29f2983 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -248,9 +248,9 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): The 'pyarrow' engine was added as an *experimental* engine, and some features are unsupported, or may not work correctly, with this engine. For example, - the newlines_in_values in the ParseOptions of the pyarrow allows handling the + the newlines_in_values in the ParseOptions of the pyarrow allows handling the newline characters within values when parsing csv files. However, this is not - currently supported by Pandas. In this case, the 'csv' module in the pyarrow + currently supported by Pandas. In this case, the 'csv' module in the pyarrow should be used instead. For more information, refer to the example. converters : dict of {{Hashable : Callable}}, optional Functions for converting values in specified columns. Keys can either diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 134a97518e627..7afd78a5f2cf5 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -153,13 +153,11 @@ def test_pyarrow_engine(self): def test_pyarrow_newlines_in_values(self): msg = ( - "CSV parser got out of sync with chunker. " - "This can mean the data file contains cell values spanning multiple lines; " - "please consider enabling the option 'newlines_in_values'." + "CSV parser got out of sync with chunker. " + "This can mean the data file contains cell values spanning multiple " + "lines; please consider enabling the option 'newlines_in_values'." ) - rows = [] - for idx in range(1_000_000): - rows.append({"text": "ab\ncd", "idx" : idx}) + rows = [{"text": "ab\ncd", "idx" : idx} for idx in range(1_000_000)] df = DataFrame(rows) df.to_csv("test.csv", index=False) with pytest.raises(ValueError, match=msg): From 898365eb3fb9340f278cffbaaa28104cac7ee30f Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Sun, 8 Sep 2024 22:39:51 -0400 Subject: [PATCH 03/13] fixed hook error --- pandas/tests/io/parser/test_unsupported.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 7afd78a5f2cf5..71909dfa20bbd 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -16,10 +16,10 @@ from pandas.errors import ParserError import pandas._testing as tm +from pandas.core.frame import DataFrame from pandas.io.parsers import read_csv import pandas.io.parsers.readers as parsers -from pandas.core.frame import DataFrame pytestmark = pytest.mark.filterwarnings( "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" @@ -156,8 +156,8 @@ def test_pyarrow_newlines_in_values(self): "CSV parser got out of sync with chunker. " "This can mean the data file contains cell values spanning multiple " "lines; please consider enabling the option 'newlines_in_values'." - ) - rows = [{"text": "ab\ncd", "idx" : idx} for idx in range(1_000_000)] + ) + rows = [{"text": "ab\ncd", "idx": idx} for idx in range(1_000_000)] df = DataFrame(rows) df.to_csv("test.csv", index=False) with pytest.raises(ValueError, match=msg): From 7fdbf435e20271a7f3826aa7a26ba2366118504e Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Tue, 10 Sep 2024 21:53:43 -0400 Subject: [PATCH 04/13] fixed unit-test failure. --- pandas/io/parsers/readers.py | 5 +++-- pandas/tests/io/parser/test_unsupported.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 3f5f0f29f2983..7a8a8d8c72ec0 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -555,11 +555,12 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): col 3 datetime64[ns] dtype: object -The csv in pyarrow must be used if values have new line character. +The csv in the pyarrow must be used if the values in the file have +new line characters. >>> from pyarrow import csv >>> parse_options = csv.ParseOptions(newlines_in_values=True) ->>> table = csv.read_csv("./example.csv", parse_options=parse_options) +>>> table = csv.read_csv("example.csv", parse_options=parse_options) >>> df = table.to_pandas() >>> df.head() text idx diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 71909dfa20bbd..f44ef04a7fc22 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -152,6 +152,7 @@ def test_pyarrow_engine(self): read_csv(StringIO(data), engine="pyarrow", **kwargs) def test_pyarrow_newlines_in_values(self): + pytest.importorskip("pyarrow") msg = ( "CSV parser got out of sync with chunker. " "This can mean the data file contains cell values spanning multiple " From 25c2604122212acb6caa0ab4f75459363fc155ba Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Sun, 8 Sep 2024 21:36:46 -0400 Subject: [PATCH 05/13] Added document and a test case for newlines_in_values case. --- pandas/io/parsers/readers.py | 21 +++++++++++++++++++-- pandas/tests/io/parser/test_unsupported.py | 16 ++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index ffc2690a5efdf..039053d702d4e 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -247,7 +247,11 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): .. versionadded:: 1.4.0 The 'pyarrow' engine was added as an *experimental* engine, and some features - are unsupported, or may not work correctly, with this engine. + are unsupported, or may not work correctly, with this engine. For example, + the newlines_in_values in the ParseOptions of the pyarrow allows handling the + newline characters within values when parsing csv files. However, this is not + currently supported by Pandas. In this case, the 'csv' module in the pyarrow + should be used instead. For more information, refer to the example. converters : dict of {{Hashable : Callable}}, optional Functions for converting values in specified columns. Keys can either be column labels or column indices. @@ -545,12 +549,25 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): ... parse_dates=[1, 2], ... date_format={{'col 2': '%d/%m/%Y', 'col 3': '%a %d %b %Y'}}, ... ) # doctest: +SKIP - >>> df.dtypes # doctest: +SKIP col 1 int64 col 2 datetime64[ns] col 3 datetime64[ns] dtype: object + +The csv in pyarrow must be used if values have new line character. + +>>> from pyarrow import csv +>>> parse_options = csv.ParseOptions(newlines_in_values=True) +>>> table = csv.read_csv("./example.csv", parse_options=parse_options) +>>> df = table.to_pandas() +>>> df.head() + text idx +0 ab\ncd 0 +1 ab\ncd 1 +2 ab\ncd 2 +3 ab\ncd 3 +4 ab\ncd 4 """ # noqa: E501 diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 07f84466e3ac2..134a97518e627 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -19,6 +19,7 @@ from pandas.io.parsers import read_csv import pandas.io.parsers.readers as parsers +from pandas.core.frame import DataFrame pytestmark = pytest.mark.filterwarnings( "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" @@ -150,6 +151,21 @@ def test_pyarrow_engine(self): with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine="pyarrow", **kwargs) + def test_pyarrow_newlines_in_values(self): + msg = ( + "CSV parser got out of sync with chunker. " + "This can mean the data file contains cell values spanning multiple lines; " + "please consider enabling the option 'newlines_in_values'." + ) + rows = [] + for idx in range(1_000_000): + rows.append({"text": "ab\ncd", "idx" : idx}) + df = DataFrame(rows) + df.to_csv("test.csv", index=False) + with pytest.raises(ValueError, match=msg): + read_csv("test.csv", engine="pyarrow") + os.unlink("test.csv") + def test_on_bad_lines_callable_python_or_pyarrow(self, all_parsers): # GH 5686 # GH 54643 From 703654fd86fd5a116bb601433d0a3df9b97ccc40 Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Tue, 24 Sep 2024 22:11:20 -0400 Subject: [PATCH 06/13] fixed unit test --- pandas/tests/io/parser/test_unsupported.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index f44ef04a7fc22..29607251b02aa 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -152,7 +152,6 @@ def test_pyarrow_engine(self): read_csv(StringIO(data), engine="pyarrow", **kwargs) def test_pyarrow_newlines_in_values(self): - pytest.importorskip("pyarrow") msg = ( "CSV parser got out of sync with chunker. " "This can mean the data file contains cell values spanning multiple " @@ -161,7 +160,7 @@ def test_pyarrow_newlines_in_values(self): rows = [{"text": "ab\ncd", "idx": idx} for idx in range(1_000_000)] df = DataFrame(rows) df.to_csv("test.csv", index=False) - with pytest.raises(ValueError, match=msg): + with pytest.raises(ParserError, match=msg): read_csv("test.csv", engine="pyarrow") os.unlink("test.csv") From ca34b249ddb6b24ae9faa7a29ad08c3e08a50e45 Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Tue, 24 Sep 2024 22:48:45 -0400 Subject: [PATCH 07/13] skip test if pyarrow cannot be imported. --- pandas/tests/io/parser/test_unsupported.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 29607251b02aa..36075cc271ced 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -152,6 +152,7 @@ def test_pyarrow_engine(self): read_csv(StringIO(data), engine="pyarrow", **kwargs) def test_pyarrow_newlines_in_values(self): + pytest.importorskip("pyarrow") msg = ( "CSV parser got out of sync with chunker. " "This can mean the data file contains cell values spanning multiple " From c4222ad8902b80e11d961725b1fc0a24ae931e0f Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Thu, 26 Sep 2024 21:24:13 -0400 Subject: [PATCH 08/13] Added pyarrow version check --- pandas/tests/io/parser/test_unsupported.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 36075cc271ced..984a7ae4eb761 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -13,6 +13,7 @@ import pytest +from pandas.compat.pyarrow import pa_version_under11p0 from pandas.errors import ParserError import pandas._testing as tm @@ -151,6 +152,7 @@ def test_pyarrow_engine(self): with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine="pyarrow", **kwargs) + @pytest.mark.skipif(pa_version_under11p0, reason="not raise ParseError") def test_pyarrow_newlines_in_values(self): pytest.importorskip("pyarrow") msg = ( From ff7dcba2fb336bb1657bbc96f3b5bdb9d77f8447 Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Thu, 26 Sep 2024 21:42:37 -0400 Subject: [PATCH 09/13] changed version --- pandas/tests/io/parser/test_unsupported.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 984a7ae4eb761..213806c3bb2e1 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -13,7 +13,7 @@ import pytest -from pandas.compat.pyarrow import pa_version_under11p0 +from pandas.compat.pyarrow import pa_version_under15p0 from pandas.errors import ParserError import pandas._testing as tm @@ -152,7 +152,7 @@ def test_pyarrow_engine(self): with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine="pyarrow", **kwargs) - @pytest.mark.skipif(pa_version_under11p0, reason="not raise ParseError") + @pytest.mark.skipif(pa_version_under15p0, reason="not raise ParseError") def test_pyarrow_newlines_in_values(self): pytest.importorskip("pyarrow") msg = ( From fb87e78fbb4403f2f65f92ae0a805752ddecd00a Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Thu, 26 Sep 2024 22:04:09 -0400 Subject: [PATCH 10/13] pa_version_13p0 --- pandas/tests/io/parser/test_unsupported.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 213806c3bb2e1..bb97849cb2d95 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -13,7 +13,7 @@ import pytest -from pandas.compat.pyarrow import pa_version_under15p0 +from pandas.compat.pyarrow import pa_version_under13p0 from pandas.errors import ParserError import pandas._testing as tm @@ -152,7 +152,7 @@ def test_pyarrow_engine(self): with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine="pyarrow", **kwargs) - @pytest.mark.skipif(pa_version_under15p0, reason="not raise ParseError") + @pytest.mark.skipif(pa_version_under13p0, reason="not raise ParseError") def test_pyarrow_newlines_in_values(self): pytest.importorskip("pyarrow") msg = ( From 84d8b6d53060de6873aeea0993b789afa4f0e56e Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Fri, 27 Sep 2024 20:42:45 -0400 Subject: [PATCH 11/13] debugging test --- pandas/tests/io/parser/test_unsupported.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index bb97849cb2d95..11aec8a97138f 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -13,7 +13,14 @@ import pytest -from pandas.compat.pyarrow import pa_version_under13p0 +from pandas.compat.pyarrow import ( + pa_version_under11p0, + pa_version_under13p0, + pa_version_under14p0, + pa_version_under15p0, + pa_version_under17p0, + pa_version_under18p0, +) from pandas.errors import ParserError import pandas._testing as tm @@ -163,6 +170,10 @@ def test_pyarrow_newlines_in_values(self): rows = [{"text": "ab\ncd", "idx": idx} for idx in range(1_000_000)] df = DataFrame(rows) df.to_csv("test.csv", index=False) + print(f"11={pa_version_under11p0}, 13={pa_version_under13p0}") + print(f"14={pa_version_under14p0}, 15={pa_version_under15p0}") + print(f"17={pa_version_under17p0}, 18={pa_version_under18p0}") + with pytest.raises(ParserError, match=msg): read_csv("test.csv", engine="pyarrow") os.unlink("test.csv") From 18618c77ce1c738b0efd0b6e4e5751ddb3551339 Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Fri, 27 Sep 2024 21:06:38 -0400 Subject: [PATCH 12/13] roll-back of compat --- pandas/tests/io/parser/test_unsupported.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 11aec8a97138f..b05bfa1299428 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -13,14 +13,6 @@ import pytest -from pandas.compat.pyarrow import ( - pa_version_under11p0, - pa_version_under13p0, - pa_version_under14p0, - pa_version_under15p0, - pa_version_under17p0, - pa_version_under18p0, -) from pandas.errors import ParserError import pandas._testing as tm @@ -159,7 +151,6 @@ def test_pyarrow_engine(self): with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine="pyarrow", **kwargs) - @pytest.mark.skipif(pa_version_under13p0, reason="not raise ParseError") def test_pyarrow_newlines_in_values(self): pytest.importorskip("pyarrow") msg = ( @@ -170,9 +161,9 @@ def test_pyarrow_newlines_in_values(self): rows = [{"text": "ab\ncd", "idx": idx} for idx in range(1_000_000)] df = DataFrame(rows) df.to_csv("test.csv", index=False) - print(f"11={pa_version_under11p0}, 13={pa_version_under13p0}") - print(f"14={pa_version_under14p0}, 15={pa_version_under15p0}") - print(f"17={pa_version_under17p0}, 18={pa_version_under18p0}") + # print(f"11={pa_version_under11p0}, 13={pa_version_under13p0}") + # print(f"14={pa_version_under14p0}, 15={pa_version_under15p0}") + # print(f"17={pa_version_under17p0}, 18={pa_version_under18p0}") with pytest.raises(ParserError, match=msg): read_csv("test.csv", engine="pyarrow") From fe946a20b910a2dea00070ea576ac829bd6207cc Mon Sep 17 00:00:00 2001 From: Wooseog Choi Date: Fri, 27 Sep 2024 21:35:12 -0400 Subject: [PATCH 13/13] run unit test for pyarrow v. 18 --- pandas/tests/io/parser/test_unsupported.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index b05bfa1299428..94f71bf38ec43 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -13,6 +13,7 @@ import pytest +from pandas.compat.pyarrow import pa_version_under18p0 from pandas.errors import ParserError import pandas._testing as tm @@ -151,6 +152,7 @@ def test_pyarrow_engine(self): with pytest.raises(ValueError, match=msg): read_csv(StringIO(data), engine="pyarrow", **kwargs) + @pytest.mark.skipif(not pa_version_under18p0, reason="No ParserError raised") def test_pyarrow_newlines_in_values(self): pytest.importorskip("pyarrow") msg = ( @@ -161,9 +163,6 @@ def test_pyarrow_newlines_in_values(self): rows = [{"text": "ab\ncd", "idx": idx} for idx in range(1_000_000)] df = DataFrame(rows) df.to_csv("test.csv", index=False) - # print(f"11={pa_version_under11p0}, 13={pa_version_under13p0}") - # print(f"14={pa_version_under14p0}, 15={pa_version_under15p0}") - # print(f"17={pa_version_under17p0}, 18={pa_version_under18p0}") with pytest.raises(ParserError, match=msg): read_csv("test.csv", engine="pyarrow")