Skip to content
4 changes: 2 additions & 2 deletions pandas/tests/frame/methods/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,9 +836,9 @@ def test_to_csv_dups_cols2(self, temp_file):
result = result.rename(columns={"a.1": "a"})
tm.assert_frame_equal(result, df)

@pytest.mark.parametrize("chunksize", [10000, 50000, 100000])
@pytest.mark.parametrize("chunksize", [1, 5, 10])
def test_to_csv_chunking(self, chunksize, temp_file):
aa = DataFrame({"A": range(100000)})
aa = DataFrame({"A": range(10)})
aa["B"] = aa.A + 1.0
aa["C"] = aa.A + 2.0
aa["D"] = aa.A + 3.0
Expand Down
24 changes: 15 additions & 9 deletions pandas/tests/groupby/test_groupby_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,20 @@ def test_groupby_drop_nan_with_multi_index():
tm.assert_frame_equal(result, expected)


# sequence_index enumerates all strings made up of x, y, z of length 4
@pytest.mark.parametrize("sequence_index", range(3**4))
# y >x and z is the missing value
@pytest.mark.parametrize(
"sequence",
[
"xyzy",
"xxyz",
"yzxz",
"zzzz",
"zyzx",
"yyyy",
"zzxy",
"xyxy",
],
)
@pytest.mark.parametrize(
"dtype",
[
Expand All @@ -419,15 +431,9 @@ def test_groupby_drop_nan_with_multi_index():
],
)
@pytest.mark.parametrize("test_series", [True, False])
def test_no_sort_keep_na(sequence_index, dtype, test_series, as_index):
def test_no_sort_keep_na(sequence, dtype, test_series, as_index):
# GH#46584, GH#48794

# Convert sequence_index into a string sequence, e.g. 5 becomes "xxyz"
# This sequence is used for the grouper.
sequence = "".join(
[{0: "x", 1: "y", 2: "z"}[sequence_index // (3**k) % 3] for k in range(4)]
)

# Unique values to use for grouper, depends on dtype
if dtype in ("string", "string[pyarrow]"):
uniques = {"x": "x", "y": "y", "z": pd.NA}
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/indexes/datetimes/methods/test_tz_localize.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,23 +149,23 @@ def test_dti_tz_localize_pass_dates_to_utc(self, tzstr):
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
def test_dti_tz_localize(self, prefix):
tzstr = prefix + "US/Eastern"
dti = date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="ms")
dti = date_range(start="1/1/2005", end="1/1/2005 0:00:02.256", freq="ms")
dti2 = dti.tz_localize(tzstr)

dti_utc = date_range(
start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="ms", tz="utc"
start="1/1/2005 05:00", end="1/1/2005 5:00:02.256", freq="ms", tz="utc"
)

tm.assert_numpy_array_equal(dti2.values, dti_utc.values)

dti3 = dti2.tz_convert(prefix + "US/Pacific")
tm.assert_numpy_array_equal(dti3.values, dti_utc.values)

dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="ms")
dti = date_range(start="11/6/2011 1:59:59", end="11/6/2011 2:00", freq="ms")
with pytest.raises(ValueError, match="Cannot infer dst time"):
dti.tz_localize(tzstr)

dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="ms")
dti = date_range(start="3/13/2011 1:59:59", end="3/13/2011 2:00", freq="ms")
with pytest.raises(ValueError, match="2011-03-13 02:00:00"):
dti.tz_localize(tzstr)

Expand Down
41 changes: 3 additions & 38 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
datetime,
timedelta,
)
import itertools

import numpy as np
import pytest
Expand All @@ -23,36 +22,6 @@
###############################################################


@pytest.fixture(autouse=True, scope="class")
def check_comprehensiveness(request):
# Iterate over combination of dtype, method and klass
# and ensure that each are contained within a collected test
cls = request.cls
combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])

def has_test(combo):
klass, dtype, method = combo
cls_funcs = request.node.session.items
return any(
klass in x.name and dtype in x.name and method in x.name for x in cls_funcs
)

opts = request.config.option
if opts.lf or opts.keyword:
# If we are running with "last-failed" or -k foo, we expect to only
# run a subset of tests.
yield

else:
for combo in combos:
if not has_test(combo):
raise AssertionError(
f"test method is not defined: {cls.__name__}, {combo}"
)

yield


class CoercionBase:
klasses = ["index", "series"]
dtypes = [
Expand Down Expand Up @@ -541,10 +510,6 @@ class TestFillnaSeriesCoercion(CoercionBase):

method = "fillna"

@pytest.mark.xfail(reason="Test not implemented")
def test_has_comprehensive_tests(self):
raise NotImplementedError

def _assert_fillna_conversion(self, original, value, expected, expected_dtype):
"""test coercion triggered by fillna"""
target = original.copy()
Expand Down Expand Up @@ -823,7 +788,7 @@ def replacer(self, how, from_key, to_key):
raise ValueError
return replacer

def test_replace_series(self, how, to_key, from_key, replacer):
def test_replace_series(self, to_key, from_key, replacer):
index = pd.Index([3, 4], name="xxx")
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
obj = obj.astype(from_key)
Expand Down Expand Up @@ -860,7 +825,7 @@ def test_replace_series(self, how, to_key, from_key, replacer):
"from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True
)
def test_replace_series_datetime_tz(
self, how, to_key, from_key, replacer, using_infer_string
self, to_key, from_key, replacer, using_infer_string
):
index = pd.Index([3, 4], name="xyz")
obj = pd.Series(self.rep[from_key], index=index, name="yyy").dt.as_unit("ns")
Expand All @@ -885,7 +850,7 @@ def test_replace_series_datetime_tz(
["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
indirect=True,
)
def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer):
def test_replace_series_datetime_datetime(self, to_key, from_key, replacer):
index = pd.Index([3, 4], name="xyz")
obj = pd.Series(self.rep[from_key], index=index, name="yyy").dt.as_unit("ns")
assert obj.dtype == from_key
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,7 @@ def test_loc_non_unique(self):
tm.assert_frame_equal(result, expected)

@pytest.mark.arm_slow
@pytest.mark.slow
@pytest.mark.parametrize("length, l2", [[900, 100], [900000, 100000]])
def test_loc_non_unique_memory_error(self, length, l2):
# GH 4280
Expand Down
13 changes: 7 additions & 6 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1340,25 +1340,26 @@ def _convert_categorical(from_frame: DataFrame) -> DataFrame:
return from_frame

def test_iterator(self, datapath):
fname = datapath("io", "data", "stata", "stata3_117.dta")
fname = datapath("io", "data", "stata", "stata12_117.dta")

parsed = read_stata(fname)
expected = parsed.iloc[0:5, :]

with read_stata(fname, iterator=True) as itr:
chunk = itr.read(5)
tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
tm.assert_frame_equal(expected, chunk)

with read_stata(fname, chunksize=5) as itr:
chunk = list(itr)
tm.assert_frame_equal(parsed.iloc[0:5, :], chunk[0])
chunk = next(itr)
tm.assert_frame_equal(expected, chunk)

with read_stata(fname, iterator=True) as itr:
chunk = itr.get_chunk(5)
tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
tm.assert_frame_equal(expected, chunk)

with read_stata(fname, chunksize=5) as itr:
chunk = itr.get_chunk()
tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
tm.assert_frame_equal(expected, chunk)

# GH12153
with read_stata(fname, chunksize=4) as itr:
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/resample/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ def test_asfreq_fill_value(index):
@pytest.mark.parametrize(
"index",
[
timedelta_range("1 day", "10 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"),
timedelta_range("1 day", "3 day", freq="D"),
date_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
period_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"),
],
)
def test_resample_interpolate(index):
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def test_int64_overflow_groupby_large_range(self):
grouped = data.groupby(["a", "b", "c", "d"])
assert len(grouped) == len(values)

@pytest.mark.slow
@pytest.mark.parametrize("agg", ["mean", "median"])
def test_int64_overflow_groupby_large_df_shuffled(self, agg):
rs = np.random.default_rng(2)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/tslibs/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def test_tz_localize_to_utc_copies():

def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture):
tz = tz_aware_fixture
tz_didx = date_range("2014-03-01", "2015-01-10", freq="h", tz=tz)
naive_didx = date_range("2014-03-01", "2015-01-10", freq="h")
tz_didx = date_range("2014-03-01", "2014-04-01", freq="h", tz=tz)
naive_didx = date_range("2014-03-01", "2014-04-01", freq="h")

_compare_utc_to_local(tz_didx)
_compare_local_to_utc(tz_didx, naive_didx)
Expand Down
38 changes: 0 additions & 38 deletions pandas/tests/tslibs/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import re

from dateutil.parser import parse as du_parse
from hypothesis import given
import numpy as np
import pytest

Expand All @@ -30,7 +29,6 @@
option_context,
)
import pandas._testing as tm
from pandas._testing._hypothesis import DATETIME_NO_TZ


@pytest.mark.skipif(WASM, reason="tzset is not available on WASM")
Expand Down Expand Up @@ -391,42 +389,6 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs):
return msg, result


@pytest.mark.slow
@given(DATETIME_NO_TZ)
@pytest.mark.parametrize("delimiter", list(" -./"))
@pytest.mark.parametrize("dayfirst", [True, False])
@pytest.mark.parametrize(
"date_format",
["%d %m %Y", "%m %d %Y", "%m %Y", "%Y %m %d", "%y %m %d", "%Y%m%d", "%y%m%d"],
)
def test_hypothesis_delimited_date(
request, date_format, dayfirst, delimiter, test_datetime
):
if date_format == "%m %Y" and delimiter == ".":
request.applymarker(
pytest.mark.xfail(
reason="parse_datetime_string cannot reliably tell whether "
"e.g. %m.%Y is a float or a date",
strict=False,
)
)
date_string = test_datetime.strftime(date_format.replace(" ", delimiter))

except_out_dateutil, result = _helper_hypothesis_delimited_date(
parsing.py_parse_datetime_string, date_string, dayfirst=dayfirst
)
except_in_dateutil, expected = _helper_hypothesis_delimited_date(
du_parse,
date_string,
default=datetime(1, 1, 1),
dayfirst=dayfirst,
yearfirst=False,
)

assert except_out_dateutil == except_in_dateutil
assert result == expected


@pytest.mark.parametrize("input", ["21-01-01", "01-01-21"])
@pytest.mark.parametrize("dayfirst", [True, False])
def test_parse_datetime_string_with_reso_dayfirst(dayfirst, input):
Expand Down
Loading