@@ -90,7 +90,12 @@ def test_read_empty_dta_with_dtypes(self, version):
9090 "f64" : np .array ([0 ], dtype = np .float64 ),
9191 }
9292 )
93- expected = empty_df_typed .copy ()
93+ # GH 7369, make sure can read a 0-obs dta file
94+ with tm .ensure_clean () as path :
95+ empty_df_typed .to_stata (path , write_index = False , version = version )
96+ empty_reread = read_stata (path )
97+
98+ expected = empty_df_typed
9499 # No uint# support. Downcast since values in range for int#
95100 expected ["u8" ] = expected ["u8" ].astype (np .int8 )
96101 expected ["u16" ] = expected ["u16" ].astype (np .int16 )
@@ -99,12 +104,8 @@ def test_read_empty_dta_with_dtypes(self, version):
99104 expected ["u64" ] = expected ["u64" ].astype (np .int32 )
100105 expected ["i64" ] = expected ["i64" ].astype (np .int32 )
101106
102- # GH 7369, make sure can read a 0-obs dta file
103- with tm .ensure_clean () as path :
104- empty_df_typed .to_stata (path , write_index = False , version = version )
105- empty_reread = read_stata (path )
106- tm .assert_frame_equal (expected , empty_reread )
107- tm .assert_series_equal (expected .dtypes , empty_reread .dtypes )
107+ tm .assert_frame_equal (expected , empty_reread )
108+ tm .assert_series_equal (expected .dtypes , empty_reread .dtypes )
108109
109110 @pytest .mark .parametrize ("version" , [114 , 117 , 118 , 119 , None ])
110111 def test_read_index_col_none (self , version ):
@@ -115,7 +116,7 @@ def test_read_index_col_none(self, version):
115116 read_df = read_stata (path )
116117
117118 assert isinstance (read_df .index , pd .RangeIndex )
118- expected = df . copy ()
119+ expected = df
119120 expected ["a" ] = expected ["a" ].astype (np .int32 )
120121 tm .assert_frame_equal (read_df , expected , check_index_type = True )
121122
@@ -325,7 +326,7 @@ def test_read_write_dta5(self):
325326 original .to_stata (path , convert_dates = None )
326327 written_and_read_again = self .read_dta (path )
327328
328- expected = original . copy ()
329+ expected = original
329330 expected .index = expected .index .astype (np .int32 )
330331 tm .assert_frame_equal (written_and_read_again .set_index ("index" ), expected )
331332
@@ -424,7 +425,7 @@ def test_read_write_dta11(self):
424425
425426 written_and_read_again = self .read_dta (path )
426427
427- expected = formatted . copy ()
428+ expected = formatted
428429 expected .index = expected .index .astype (np .int32 )
429430 tm .assert_frame_equal (written_and_read_again .set_index ("index" ), expected )
430431
@@ -462,7 +463,7 @@ def test_read_write_dta12(self, version):
462463
463464 written_and_read_again = self .read_dta (path )
464465
465- expected = formatted . copy ()
466+ expected = formatted
466467 expected .index = expected .index .astype (np .int32 )
467468 tm .assert_frame_equal (written_and_read_again .set_index ("index" ), expected )
468469
@@ -480,7 +481,7 @@ def test_read_write_dta13(self):
480481 original .to_stata (path )
481482 written_and_read_again = self .read_dta (path )
482483
483- expected = formatted . copy ()
484+ expected = formatted
484485 expected .index = expected .index .astype (np .int32 )
485486 tm .assert_frame_equal (written_and_read_again .set_index ("index" ), expected )
486487
@@ -561,7 +562,7 @@ def test_numeric_column_names(self):
561562 convert_col_name = lambda x : int (x [1 ])
562563 written_and_read_again .columns = map (convert_col_name , columns )
563564
564- expected = original . copy ()
565+ expected = original
565566 expected .index = expected .index .astype (np .int32 )
566567 tm .assert_frame_equal (expected , written_and_read_again )
567568
@@ -579,7 +580,7 @@ def test_nan_to_missing_value(self, version):
579580 written_and_read_again = self .read_dta (path )
580581
581582 written_and_read_again = written_and_read_again .set_index ("index" )
582- expected = original . copy ()
583+ expected = original
583584 expected .index = expected .index .astype (np .int32 )
584585 tm .assert_frame_equal (written_and_read_again , expected )
585586
@@ -602,7 +603,7 @@ def test_string_no_dates(self):
602603 original .to_stata (path )
603604 written_and_read_again = self .read_dta (path )
604605
605- expected = original . copy ()
606+ expected = original
606607 expected .index = expected .index .astype (np .int32 )
607608 tm .assert_frame_equal (written_and_read_again .set_index ("index" ), expected )
608609
@@ -619,7 +620,7 @@ def test_large_value_conversion(self):
619620
620621 written_and_read_again = self .read_dta (path )
621622
622- modified = original . copy ()
623+ modified = original
623624 modified ["s1" ] = Series (modified ["s1" ], dtype = np .int16 )
624625 modified ["s2" ] = Series (modified ["s2" ], dtype = np .int32 )
625626 modified ["s3" ] = Series (modified ["s3" ], dtype = np .float64 )
@@ -635,7 +636,7 @@ def test_dates_invalid_column(self):
635636
636637 written_and_read_again = self .read_dta (path )
637638
638- modified = original . copy ()
639+ modified = original
639640 modified .columns = ["_0" ]
640641 modified .index = original .index .astype (np .int32 )
641642 tm .assert_frame_equal (written_and_read_again .set_index ("index" ), modified )
@@ -721,8 +722,15 @@ def test_bool_uint(self, byteorder, version):
721722 {"s0" : s0 , "s1" : s1 , "s2" : s2 , "s3" : s3 , "s4" : s4 , "s5" : s5 , "s6" : s6 }
722723 )
723724 original .index .name = "index"
724- expected = original .copy ()
725- expected .index = original .index .astype (np .int32 )
725+
726+ with tm .ensure_clean () as path :
727+ original .to_stata (path , byteorder = byteorder , version = version )
728+ written_and_read_again = self .read_dta (path )
729+
730+ written_and_read_again = written_and_read_again .set_index ("index" )
731+
732+ expected = original
733+ expected .index = expected .index .astype (np .int32 )
726734 expected_types = (
727735 np .int8 ,
728736 np .int8 ,
@@ -735,11 +743,6 @@ def test_bool_uint(self, byteorder, version):
735743 for c , t in zip (expected .columns , expected_types ):
736744 expected [c ] = expected [c ].astype (t )
737745
738- with tm .ensure_clean () as path :
739- original .to_stata (path , byteorder = byteorder , version = version )
740- written_and_read_again = self .read_dta (path )
741-
742- written_and_read_again = written_and_read_again .set_index ("index" )
743746 tm .assert_frame_equal (written_and_read_again , expected )
744747
745748 def test_variable_labels (self , datapath ):
@@ -1000,18 +1003,19 @@ def test_categorical_writing(self, version):
10001003 "unlabeled" ,
10011004 ],
10021005 )
1003- expected = original .copy ()
1006+ with tm .ensure_clean () as path :
1007+ original .astype ("category" ).to_stata (path , version = version )
1008+ written_and_read_again = self .read_dta (path )
10041009
1005- # these are all categoricals
1006- original = pd .concat (
1007- [original [col ].astype ("category" ) for col in original ], axis = 1
1008- )
1010+ res = written_and_read_again .set_index ("index" )
1011+
1012+ expected = original
10091013 expected .index = expected .index .set_names ("index" ).astype (np .int32 )
10101014
10111015 expected ["incompletely_labeled" ] = expected ["incompletely_labeled" ].apply (str )
10121016 expected ["unlabeled" ] = expected ["unlabeled" ].apply (str )
10131017 for col in expected :
1014- orig = expected [col ]. copy ()
1018+ orig = expected [col ]
10151019
10161020 cat = orig .astype ("category" )._values
10171021 cat = cat .as_ordered ()
@@ -1022,11 +1026,6 @@ def test_categorical_writing(self, version):
10221026
10231027 expected [col ] = cat
10241028
1025- with tm .ensure_clean () as path :
1026- original .to_stata (path , version = version )
1027- written_and_read_again = self .read_dta (path )
1028-
1029- res = written_and_read_again .set_index ("index" )
10301029 tm .assert_frame_equal (res , expected )
10311030
10321031 def test_categorical_warnings_and_errors (self ):
@@ -1037,9 +1036,7 @@ def test_categorical_warnings_and_errors(self):
10371036 columns = ["Too_long" ],
10381037 )
10391038
1040- original = pd .concat (
1041- [original [col ].astype ("category" ) for col in original ], axis = 1
1042- )
1039+ original = original .astype ("category" )
10431040 with tm .ensure_clean () as path :
10441041 msg = (
10451042 "Stata value labels for a single variable must have "
@@ -1050,10 +1047,7 @@ def test_categorical_warnings_and_errors(self):
10501047
10511048 original = DataFrame .from_records (
10521049 [["a" ], ["b" ], ["c" ], ["d" ], [1 ]], columns = ["Too_long" ]
1053- )
1054- original = pd .concat (
1055- [original [col ].astype ("category" ) for col in original ], axis = 1
1056- )
1050+ ).astype ("category" )
10571051
10581052 with tm .assert_produces_warning (ValueLabelTypeMismatch ):
10591053 original .to_stata (path )
@@ -1074,7 +1068,7 @@ def test_categorical_with_stata_missing_values(self, version):
10741068
10751069 res = written_and_read_again .set_index ("index" )
10761070
1077- expected = original . copy ()
1071+ expected = original
10781072 for col in expected :
10791073 cat = expected [col ]._values
10801074 new_cats = cat .remove_unused_categories ().categories
@@ -1525,7 +1519,7 @@ def test_out_of_range_float(self):
15251519 reread = read_stata (path )
15261520
15271521 original ["ColumnTooBig" ] = original ["ColumnTooBig" ].astype (np .float64 )
1528- expected = original . copy ()
1522+ expected = original
15291523 expected .index = expected .index .astype (np .int32 )
15301524 tm .assert_frame_equal (reread .set_index ("index" ), expected )
15311525
@@ -1672,13 +1666,13 @@ def test_writer_117(self):
16721666 version = 117 ,
16731667 )
16741668 written_and_read_again = self .read_dta (path )
1675- # original.index is np.int32, read index is np.int64
1676- tm .assert_frame_equal (
1677- written_and_read_again .set_index ("index" ),
1678- original ,
1679- check_index_type = False ,
1680- )
1681- tm .assert_frame_equal (original , copy )
1669+ # original.index is np.int32, read index is np.int64
1670+ tm .assert_frame_equal (
1671+ written_and_read_again .set_index ("index" ),
1672+ original ,
1673+ check_index_type = False ,
1674+ )
1675+ tm .assert_frame_equal (original , copy )
16821676
16831677 def test_convert_strl_name_swap (self ):
16841678 original = DataFrame (
@@ -2052,7 +2046,7 @@ def test_compression(compression, version, use_dict, infer, compression_to_exten
20522046 fp = path
20532047 reread = read_stata (fp , index_col = "index" )
20542048
2055- expected = df . copy ()
2049+ expected = df
20562050 expected .index = expected .index .astype (np .int32 )
20572051 tm .assert_frame_equal (reread , expected )
20582052
@@ -2078,7 +2072,7 @@ def test_compression_dict(method, file_ext):
20782072 fp = path
20792073 reread = read_stata (fp , index_col = "index" )
20802074
2081- expected = df . copy ()
2075+ expected = df
20822076 expected .index = expected .index .astype (np .int32 )
20832077 tm .assert_frame_equal (reread , expected )
20842078
0 commit comments