66from pandas import read_csv , to_datetime
77
88from pandas_datareader .base import _BaseReader
9- from pandas_datareader .compat import StringIO
9+ from pandas_datareader .compat import PYTHON_LT_3_10 , StringIO
1010
1111_URL = "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/"
1212_URL_PREFIX = "ftp/"
@@ -57,7 +57,7 @@ def _read_zipfile(self, url):
5757 with tempfile .TemporaryFile () as tmpf :
5858 tmpf .write (raw )
5959 with ZipFile (tmpf , "r" ) as zf :
60- try :
60+ try :
6161 data = zf .open (zf .namelist ()[0 ]).read ().decode ("utf-8" , "ignore" )
6262 except UnicodeDecodeError :
6363 data = zf .open (zf .namelist ()[0 ]).read ().decode (encoding = "cp1252" )
@@ -78,8 +78,6 @@ def read(self):
7878 def _read_one_data (self , url , params ):
7979 params = {
8080 "index_col" : 0 ,
81- "parse_dates" : [0 ],
82- "date_parser" : _parse_date_famafrench ,
8381 }
8482
8583 # headers in these files are not valid
@@ -89,7 +87,12 @@ def _read_one_data(self, url, params):
8987 else :
9088 c = ["Count" ]
9189 r = list (range (0 , 105 , 5 ))
92- params ["names" ] = ["Date" ] + c + list (zip (r , r [1 :], strict = False ))
90+
91+ if PYTHON_LT_3_10 :
92+ additional_params = list (zip (r , r [1 :])) # noqa: B905
93+ else :
94+ additional_params = list (zip (r , r [1 :], strict = False ))
95+ params ["names" ] = ["Date" ] + c + additional_params
9396
9497 if self .symbols != "Prior_2-12_Breakpoints" :
9598 params ["skiprows" ] = 1
@@ -111,12 +114,14 @@ def _read_one_data(self, url, params):
111114 start = 0 if not match else match .start ()
112115
113116 df = read_csv (StringIO ("Date" + src [start :]), ** params )
114- try :
115- idx_name = df .index .name # hack for pandas 0.16.2
116- df = df .to_period (df .index .inferred_freq [:1 ])
117- df .index .name = idx_name
118- except Exception :
119- pass
117+ if df .index .min () > 190000 :
118+ df .index = to_datetime (df .index .astype (str ), format = "%Y%m" ).to_period (
119+ freq = "M"
120+ )
121+ else :
122+ df .index = to_datetime (df .index .astype (str ), format = "%Y" ).to_period (
123+ freq = "Y"
124+ )
120125 df = df .truncate (self .start , self .end )
121126 datasets [i ] = df
122127
0 commit comments