Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 8049dba

Browse files
Migrating to pyarrow=4.0.1 (#982)
Motivation: keep up with the latest versions of dependencies
1 parent ab14b31 commit 8049dba

File tree

6 files changed

+12
-10
lines changed

6 files changed

+12
-10
lines changed

README.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ Building on Linux with setuptools
8585

8686
export PYVER=<3.6 or 3.7>
8787
export NUMPYVER=<1.16 or 1.17>
88-
conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER tbb-devel tbb4py numba=0.52 pandas=1.2.0 pyarrow=2.0.0 gcc_linux-64 gxx_linux-64
88+
conda create -n sdc-env -q -y -c intel/label/beta -c defaults -c intel -c conda-forge python=$PYVER numpy=$NUMPYVER tbb-devel tbb4py numba=0.53.1 pandas=1.2.0 pyarrow=4.0.1 gcc_linux-64 gxx_linux-64
8989
source activate sdc-env
9090
git clone https://github.com/IntelPython/sdc.git
9191
cd sdc
@@ -123,7 +123,7 @@ Building on Windows with setuptools
123123

124124
set PYVER=<3.6 or 3.7>
125125
set NUMPYVER=<1.16 or 1.17>
126-
conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% tbb-devel tbb4py numba=0.52 pandas=1.2.0 pyarrow=2.0.0
126+
conda create -n sdc-env -c intel/label/beta -c defaults -c intel -c conda-forge python=%PYVER% numpy=%NUMPYVER% tbb-devel tbb4py numba=0.53.1 pandas=1.2.0 pyarrow=4.0.1
127127
conda activate sdc-env
128128
set INCLUDE=%INCLUDE%;%CONDA_PREFIX%\Library\include
129129
set LIB=%LIB%;%CONDA_PREFIX%\Library\lib

conda-recipe/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{% set NUMBA_VERSION = "==0.53.1" %}
22
{% set PANDAS_VERSION = "==1.2.0" %}
3-
{% set PYARROW_VERSION = "==2.0.0" %}
3+
{% set PYARROW_VERSION = "==4.0.1" %}
44

55
package:
66
name: sdc

docs/source/getting_started.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ Distribution includes Intel SDC for Python 3.6 and 3.7 for Windows and Linux pla
4141
Intel SDC conda package can be installed using the steps below:
4242
::
4343

44-
> conda create -n sdc_env python=<3.7 or 3.6> pyarrow=2.0.0 pandas=1.2.0 -c anaconda -c conda-forge
44+
> conda create -n sdc_env python=<3.7 or 3.6> pyarrow=4.0.1 pandas=1.2.0 -c anaconda -c conda-forge
4545
> conda activate sdc_env
4646
> conda install sdc -c intel/label/beta -c intel -c defaults -c conda-forge --override-channels
4747

4848
Intel SDC wheel package can be installed using the steps below:
4949
::
5050

51-
> conda create -n sdc_env python=<3.7 or 3.6> pip pyarrow=2.0.0 pandas=1.2.0 -c anaconda -c conda-forge
51+
> conda create -n sdc_env python=<3.7 or 3.6> pip pyarrow=4.0.1 pandas=1.2.0 -c anaconda -c conda-forge
5252
> conda activate sdc_env
5353
> pip install --index-url https://pypi.anaconda.org/intel/label/beta/simple --extra-index-url https://pypi.anaconda.org/intel/simple --extra-index-url https://pypi.org/simple sdc
5454

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
numpy>=1.16
22
pandas==1.2.0
3-
pyarrow==2.0.0
3+
pyarrow==4.0.1
44
numba==0.53.1
55
tbb
66
tbb-devel

sdc/io/csv_ext.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -470,9 +470,11 @@ def pandas_read_csv(
470470
try:
471471
for column in parse_dates:
472472
name = f"f{column}"
473-
# TODO: Try to help pyarrow infer date type - set DateType.
474-
# dtype[name] = pyarrow.from_numpy_dtype(np.datetime64) # string
475-
del column_types[name]
473+
# starting from pyarrow=3.0.0 strings are parsed to DateType (converted back to 'object'
474+
# when using to_pandas), but not TimestampType (that is used to represent np.datetime64)
475+
# see: pyarrow.from_numpy_dtype(np.datetime64('NaT', 's'))
476+
# so make pyarrow infer needed type manually
477+
column_types[name] = pyarrow.timestamp('s')
476478
except: pass
477479

478480
parse_options = pyarrow.csv.ParseOptions(

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ def run(self):
404404
install_requires=[
405405
'numpy>=1.16',
406406
'pandas==1.2.0',
407-
'pyarrow==2.0.0',
407+
'pyarrow==4.0.1',
408408
'numba==0.53.1',
409409
'tbb'
410410
],

0 commit comments

Comments
 (0)