@@ -340,7 +340,7 @@ cdef class TextReader:
340340 cdef:
341341 parser_t * parser
342342 object na_fvalues
343- object true_values, false_values
343+ list true_values, false_values
344344 object handle
345345 object orig_header
346346 bint na_filter, keep_default_na, has_usecols, has_mi_columns
@@ -942,6 +942,7 @@ cdef class TextReader:
942942 bint na_filter = 0
943943 int64_t num_cols
944944 dict results
945+ bint is_default_dict_dtype
945946
946947 start = self .parser_start
947948
@@ -957,26 +958,7 @@ cdef class TextReader:
957958 self .parser.line_fields[i] + \
958959 (num_cols >= self .parser.line_fields[i]) * num_cols
959960
960- usecols_not_callable_and_exists = not callable (self .usecols) and self .usecols
961- names_larger_num_cols = (self .names and
962- len (self .names) - self .leading_cols > num_cols)
963-
964- if self .table_width - self .leading_cols > num_cols:
965- if (usecols_not_callable_and_exists
966- and self .table_width - self .leading_cols < len (self .usecols)
967- or names_larger_num_cols):
968- raise ParserError(f" Too many columns specified: expected "
969- f" {self.table_width - self.leading_cols} "
970- f" and found {num_cols}" )
971-
972- if (usecols_not_callable_and_exists and
973- all (isinstance (u, int ) for u in self .usecols)):
974- missing_usecols = [col for col in self .usecols if col >= num_cols]
975- if missing_usecols:
976- raise ParserError(
977- " Defining usecols with out-of-bounds indices is not allowed. "
978- f" {missing_usecols} are out of bounds." ,
979- )
961+ self ._validate_usecols_and_names(num_cols)
980962
981963 results = {}
982964 nused = 0
@@ -1004,22 +986,7 @@ cdef class TextReader:
1004986 nused += 1
1005987
1006988 conv = self ._get_converter(i, name)
1007-
1008- col_dtype = None
1009- if self .dtype is not None :
1010- if isinstance (self .dtype, dict ):
1011- if name in self .dtype:
1012- col_dtype = self .dtype[name]
1013- elif i in self .dtype:
1014- col_dtype = self .dtype[i]
1015- elif is_default_dict_dtype:
1016- col_dtype = self .dtype[name]
1017- else :
1018- if self .dtype.names:
1019- # structured array
1020- col_dtype = np.dtype(self .dtype.descr[i][1 ])
1021- else :
1022- col_dtype = self .dtype
989+ col_dtype = self ._get_col_dtype(i, is_default_dict_dtype, name)
1023990
1024991 if conv:
1025992 if col_dtype is not None :
@@ -1267,6 +1234,47 @@ cdef class TextReader:
12671234 return _string_box_utf8(self .parser, i, start, end, na_filter,
12681235 na_hashset, self .encoding_errors)
12691236
1237+ cdef void _validate_usecols_and_names(self , int num_cols):
1238+ usecols_not_callable_and_exists = not callable (self .usecols) and self .usecols
1239+ names_larger_num_cols = (self .names and
1240+ len (self .names) - self .leading_cols > num_cols)
1241+
1242+ if self .table_width - self .leading_cols > num_cols:
1243+ if (usecols_not_callable_and_exists
1244+ and self .table_width - self .leading_cols < len (self .usecols)
1245+ or names_larger_num_cols):
1246+ raise ParserError(f" Too many columns specified: expected "
1247+ f" {self.table_width - self.leading_cols} "
1248+ f" and found {num_cols}" )
1249+
1250+ if (usecols_not_callable_and_exists and
1251+ all (isinstance (u, int ) for u in self .usecols)):
1252+ missing_usecols = [col for col in self .usecols if col >= num_cols]
1253+ if missing_usecols:
1254+ raise ParserError(
1255+ " Defining usecols with out-of-bounds indices is not allowed. "
1256+ f" {missing_usecols} are out of bounds." ,
1257+ )
1258+
1259+ # -> DtypeObj
1260+ cdef object _get_col_dtype(self , int64_t i, bint is_default_dict_dtype, name):
1261+ col_dtype = None
1262+ if self .dtype is not None :
1263+ if isinstance (self .dtype, dict ):
1264+ if name in self .dtype:
1265+ col_dtype = self .dtype[name]
1266+ elif i in self .dtype:
1267+ col_dtype = self .dtype[i]
1268+ elif is_default_dict_dtype:
1269+ col_dtype = self .dtype[name]
1270+ else :
1271+ if self .dtype.names:
1272+ # structured array
1273+ col_dtype = np.dtype(self .dtype.descr[i][1 ])
1274+ else :
1275+ col_dtype = self .dtype
1276+ return col_dtype
1277+
12701278 def _get_converter (self , i: int , name ):
12711279 if self .converters is None :
12721280 return None
@@ -1347,8 +1355,8 @@ cdef _close(TextReader reader):
13471355
13481356
13491357cdef:
1350- object _true_values = [b" True" , b" TRUE" , b" true" ]
1351- object _false_values = [b" False" , b" FALSE" , b" false" ]
1358+ list _true_values = [b" True" , b" TRUE" , b" true" ]
1359+ list _false_values = [b" False" , b" FALSE" , b" false" ]
13521360
13531361
13541362def _ensure_encoded (list lst ):
0 commit comments