diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index f9b7d9c741c79..e5fddcb23c60b 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -836,9 +836,9 @@ def test_to_csv_dups_cols2(self, temp_file): result = result.rename(columns={"a.1": "a"}) tm.assert_frame_equal(result, df) - @pytest.mark.parametrize("chunksize", [10000, 50000, 100000]) + @pytest.mark.parametrize("chunksize", [1, 5, 10]) def test_to_csv_chunking(self, chunksize, temp_file): - aa = DataFrame({"A": range(100000)}) + aa = DataFrame({"A": range(10)}) aa["B"] = aa.A + 1.0 aa["C"] = aa.A + 2.0 aa["D"] = aa.A + 3.0 diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index b44ecdde78d67..274a7a418958f 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -394,8 +394,20 @@ def test_groupby_drop_nan_with_multi_index(): tm.assert_frame_equal(result, expected) -# sequence_index enumerates all strings made up of x, y, z of length 4 -@pytest.mark.parametrize("sequence_index", range(3**4)) +# y >x and z is the missing value +@pytest.mark.parametrize( + "sequence", + [ + "xyzy", + "xxyz", + "yzxz", + "zzzz", + "zyzx", + "yyyy", + "zzxy", + "xyxy", + ], +) @pytest.mark.parametrize( "dtype", [ @@ -419,15 +431,9 @@ def test_groupby_drop_nan_with_multi_index(): ], ) @pytest.mark.parametrize("test_series", [True, False]) -def test_no_sort_keep_na(sequence_index, dtype, test_series, as_index): +def test_no_sort_keep_na(sequence, dtype, test_series, as_index): # GH#46584, GH#48794 - # Convert sequence_index into a string sequence, e.g. 5 becomes "xxyz" - # This sequence is used for the grouper. - sequence = "".join( - [{0: "x", 1: "y", 2: "z"}[sequence_index // (3**k) % 3] for k in range(4)] - ) - # Unique values to use for grouper, depends on dtype if dtype in ("string", "string[pyarrow]"): uniques = {"x": "x", "y": "y", "z": pd.NA} diff --git a/pandas/tests/indexes/datetimes/methods/test_tz_localize.py b/pandas/tests/indexes/datetimes/methods/test_tz_localize.py index 78a79ac7d1546..b5616cda64115 100644 --- a/pandas/tests/indexes/datetimes/methods/test_tz_localize.py +++ b/pandas/tests/indexes/datetimes/methods/test_tz_localize.py @@ -149,11 +149,11 @@ def test_dti_tz_localize_pass_dates_to_utc(self, tzstr): @pytest.mark.parametrize("prefix", ["", "dateutil/"]) def test_dti_tz_localize(self, prefix): tzstr = prefix + "US/Eastern" - dti = date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="ms") + dti = date_range(start="1/1/2005", end="1/1/2005 0:00:02.256", freq="ms") dti2 = dti.tz_localize(tzstr) dti_utc = date_range( - start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="ms", tz="utc" + start="1/1/2005 05:00", end="1/1/2005 5:00:02.256", freq="ms", tz="utc" ) tm.assert_numpy_array_equal(dti2.values, dti_utc.values) @@ -161,11 +161,11 @@ def test_dti_tz_localize(self, prefix): dti3 = dti2.tz_convert(prefix + "US/Pacific") tm.assert_numpy_array_equal(dti3.values, dti_utc.values) - dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="ms") + dti = date_range(start="11/6/2011 1:59:59", end="11/6/2011 2:00", freq="ms") with pytest.raises(ValueError, match="Cannot infer dst time"): dti.tz_localize(tzstr) - dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="ms") + dti = date_range(start="3/13/2011 1:59:59", end="3/13/2011 2:00", freq="ms") with pytest.raises(ValueError, match="2011-03-13 02:00:00"): dti.tz_localize(tzstr) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index d5002a47c3447..cf0bc2b872e1a 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -4,7 +4,6 @@ datetime, timedelta, ) -import itertools import numpy as np import pytest @@ -23,36 +22,6 @@ ############################################################### -@pytest.fixture(autouse=True, scope="class") -def check_comprehensiveness(request): - # Iterate over combination of dtype, method and klass - # and ensure that each are contained within a collected test - cls = request.cls - combos = itertools.product(cls.klasses, cls.dtypes, [cls.method]) - - def has_test(combo): - klass, dtype, method = combo - cls_funcs = request.node.session.items - return any( - klass in x.name and dtype in x.name and method in x.name for x in cls_funcs - ) - - opts = request.config.option - if opts.lf or opts.keyword: - # If we are running with "last-failed" or -k foo, we expect to only - # run a subset of tests. - yield - - else: - for combo in combos: - if not has_test(combo): - raise AssertionError( - f"test method is not defined: {cls.__name__}, {combo}" - ) - - yield - - class CoercionBase: klasses = ["index", "series"] dtypes = [ @@ -541,10 +510,6 @@ class TestFillnaSeriesCoercion(CoercionBase): method = "fillna" - @pytest.mark.xfail(reason="Test not implemented") - def test_has_comprehensive_tests(self): - raise NotImplementedError - def _assert_fillna_conversion(self, original, value, expected, expected_dtype): """test coercion triggered by fillna""" target = original.copy() @@ -823,7 +788,7 @@ def replacer(self, how, from_key, to_key): raise ValueError return replacer - def test_replace_series(self, how, to_key, from_key, replacer): + def test_replace_series(self, to_key, from_key, replacer): index = pd.Index([3, 4], name="xxx") obj = pd.Series(self.rep[from_key], index=index, name="yyy") obj = obj.astype(from_key) @@ -860,7 +825,7 @@ def test_replace_series(self, how, to_key, from_key, replacer): "from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True ) def test_replace_series_datetime_tz( - self, how, to_key, from_key, replacer, using_infer_string + self, to_key, from_key, replacer, using_infer_string ): index = pd.Index([3, 4], name="xyz") obj = pd.Series(self.rep[from_key], index=index, name="yyy").dt.as_unit("ns") @@ -885,7 +850,7 @@ def test_replace_series_datetime_tz( ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True, ) - def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer): + def test_replace_series_datetime_datetime(self, to_key, from_key, replacer): index = pd.Index([3, 4], name="xyz") obj = pd.Series(self.rep[from_key], index=index, name="yyy").dt.as_unit("ns") assert obj.dtype == from_key diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8e4845a72ec35..de2d914aab229 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1027,6 +1027,7 @@ def test_loc_non_unique(self): tm.assert_frame_equal(result, expected) @pytest.mark.arm_slow + @pytest.mark.slow @pytest.mark.parametrize("length, l2", [[900, 100], [900000, 100000]]) def test_loc_non_unique_memory_error(self, length, l2): # GH 4280 diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 73731cb82dc9b..b44f595e73670 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1340,25 +1340,26 @@ def _convert_categorical(from_frame: DataFrame) -> DataFrame: return from_frame def test_iterator(self, datapath): - fname = datapath("io", "data", "stata", "stata3_117.dta") + fname = datapath("io", "data", "stata", "stata12_117.dta") parsed = read_stata(fname) + expected = parsed.iloc[0:5, :] with read_stata(fname, iterator=True) as itr: chunk = itr.read(5) - tm.assert_frame_equal(parsed.iloc[0:5, :], chunk) + tm.assert_frame_equal(expected, chunk) with read_stata(fname, chunksize=5) as itr: - chunk = list(itr) - tm.assert_frame_equal(parsed.iloc[0:5, :], chunk[0]) + chunk = next(itr) + tm.assert_frame_equal(expected, chunk) with read_stata(fname, iterator=True) as itr: chunk = itr.get_chunk(5) - tm.assert_frame_equal(parsed.iloc[0:5, :], chunk) + tm.assert_frame_equal(expected, chunk) with read_stata(fname, chunksize=5) as itr: chunk = itr.get_chunk() - tm.assert_frame_equal(parsed.iloc[0:5, :], chunk) + tm.assert_frame_equal(expected, chunk) # GH12153 with read_stata(fname, chunksize=4) as itr: diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 12128be7c2d30..d3757a71bc35c 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -96,9 +96,9 @@ def test_asfreq_fill_value(index): @pytest.mark.parametrize( "index", [ - timedelta_range("1 day", "10 day", freq="D"), - date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"), - period_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D"), + timedelta_range("1 day", "3 day", freq="D"), + date_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"), + period_range(datetime(2005, 1, 1), datetime(2005, 1, 3), freq="D"), ], ) def test_resample_interpolate(index): diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index bf49afddbf09b..3ce1094c1d629 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -89,6 +89,7 @@ def test_int64_overflow_groupby_large_range(self): grouped = data.groupby(["a", "b", "c", "d"]) assert len(grouped) == len(values) + @pytest.mark.slow @pytest.mark.parametrize("agg", ["mean", "median"]) def test_int64_overflow_groupby_large_df_shuffled(self, agg): rs = np.random.default_rng(2) diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index f62910b5e1f1c..1cccfe43519cd 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -68,8 +68,8 @@ def test_tz_localize_to_utc_copies(): def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture): tz = tz_aware_fixture - tz_didx = date_range("2014-03-01", "2015-01-10", freq="h", tz=tz) - naive_didx = date_range("2014-03-01", "2015-01-10", freq="h") + tz_didx = date_range("2014-03-01", "2014-04-01", freq="h", tz=tz) + naive_didx = date_range("2014-03-01", "2014-04-01", freq="h") _compare_utc_to_local(tz_didx) _compare_local_to_utc(tz_didx, naive_didx) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index fc5ffa24980f5..5d4e2e8ddb234 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -6,7 +6,6 @@ import re from dateutil.parser import parse as du_parse -from hypothesis import given import numpy as np import pytest @@ -30,7 +29,6 @@ option_context, ) import pandas._testing as tm -from pandas._testing._hypothesis import DATETIME_NO_TZ @pytest.mark.skipif(WASM, reason="tzset is not available on WASM") @@ -391,42 +389,6 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs): return msg, result -@pytest.mark.slow -@given(DATETIME_NO_TZ) -@pytest.mark.parametrize("delimiter", list(" -./")) -@pytest.mark.parametrize("dayfirst", [True, False]) -@pytest.mark.parametrize( - "date_format", - ["%d %m %Y", "%m %d %Y", "%m %Y", "%Y %m %d", "%y %m %d", "%Y%m%d", "%y%m%d"], -) -def test_hypothesis_delimited_date( - request, date_format, dayfirst, delimiter, test_datetime -): - if date_format == "%m %Y" and delimiter == ".": - request.applymarker( - pytest.mark.xfail( - reason="parse_datetime_string cannot reliably tell whether " - "e.g. %m.%Y is a float or a date", - strict=False, - ) - ) - date_string = test_datetime.strftime(date_format.replace(" ", delimiter)) - - except_out_dateutil, result = _helper_hypothesis_delimited_date( - parsing.py_parse_datetime_string, date_string, dayfirst=dayfirst - ) - except_in_dateutil, expected = _helper_hypothesis_delimited_date( - du_parse, - date_string, - default=datetime(1, 1, 1), - dayfirst=dayfirst, - yearfirst=False, - ) - - assert except_out_dateutil == except_in_dateutil - assert result == expected - - @pytest.mark.parametrize("input", ["21-01-01", "01-01-21"]) @pytest.mark.parametrize("dayfirst", [True, False]) def test_parse_datetime_string_with_reso_dayfirst(dayfirst, input):