@@ -1174,6 +1174,180 @@ def _convert_index_labels(self, index, levels_to_convert: list[int]):
11741174 tuples .append (tuple (mutable ))
11751175 return type (index ).from_tuples (tuples , names = index .names )
11761176
1177+ @staticmethod
1178+ def _parser_engine (parser ):
1179+ return getattr (parser , "_engine" , parser )
1180+
1181+ @classmethod
1182+ def _parser_attr (cls , parser , attribute : str ):
1183+ if hasattr (parser , attribute ):
1184+ return getattr (parser , attribute )
1185+ engine = cls ._parser_engine (parser )
1186+ if engine is not parser and hasattr (engine , attribute ):
1187+ return getattr (engine , attribute )
1188+ return None
1189+
1190+ def _resolve_text_positions (
1191+ self , parser , text_formatted_cols : set [int ]
1192+ ) -> set [int ]:
1193+ if not text_formatted_cols :
1194+ return set ()
1195+
1196+ orig_names = self ._parser_attr (parser , "orig_names" ) or []
1197+ col_indices = self ._parser_attr (parser , "_col_indices" )
1198+ if col_indices is None :
1199+ max_pos = len (orig_names )
1200+ return {idx for idx in text_formatted_cols if idx < max_pos }
1201+
1202+ positions : set [int ] = set ()
1203+ for idx in text_formatted_cols :
1204+ pos = bisect_left (col_indices , idx )
1205+ if pos < len (col_indices ) and col_indices [pos ] == idx :
1206+ positions .add (pos )
1207+ return positions
1208+
1209+ def _inject_text_converters (self , parser , text_positions : set [int ]) -> None :
1210+ if not text_positions :
1211+ return
1212+
1213+ target = self ._parser_engine (parser )
1214+ existing_converters = getattr (target , "converters" , None )
1215+ if existing_converters is None :
1216+ target .converters = {}
1217+ existing_clean : dict = {}
1218+ else :
1219+ target .converters = dict (existing_converters )
1220+ existing_clean = target ._clean_mapping (existing_converters )
1221+
1222+ orig_names = self ._parser_attr (parser , "orig_names" ) or []
1223+
1224+ for pos in text_positions :
1225+ if pos >= len (orig_names ):
1226+ continue
1227+ label = orig_names [pos ]
1228+ if existing_clean and label in existing_clean :
1229+ continue
1230+ target .converters [pos ] = self ._text_format_converter
1231+
1232+ def _finalize_text_columns (
1233+ self ,
1234+ frame : DataFrame ,
1235+ parser ,
1236+ text_positions : set [int ],
1237+ ) -> None :
1238+ if not text_positions or frame .empty :
1239+ return
1240+
1241+ orig_names = self ._parser_attr (parser , "orig_names" ) or []
1242+ total_positions = len (orig_names )
1243+ if total_positions == 0 :
1244+ return
1245+
1246+ index_positions = self ._resolve_index_positions (parser , total_positions )
1247+
1248+ data_position_map : dict [int , int ] = {}
1249+ df_col_index = 0
1250+ for pos in range (total_positions ):
1251+ if pos in index_positions :
1252+ continue
1253+ if df_col_index >= frame .shape [1 ]:
1254+ break
1255+ data_position_map [pos ] = df_col_index
1256+ df_col_index += 1
1257+
1258+ for pos in text_positions :
1259+ if pos in index_positions :
1260+ continue
1261+ df_pos = data_position_map .get (pos )
1262+ if df_pos is None :
1263+ continue
1264+ frame .iloc [:, df_pos ] = frame .iloc [:, df_pos ].map (
1265+ self ._text_format_converter
1266+ )
1267+
1268+ index_levels = self ._index_levels_for_positions (
1269+ index_positions , text_positions , total_positions
1270+ )
1271+ if index_levels :
1272+ frame .index = self ._convert_index_labels (frame .index , index_levels )
1273+
1274+ def _coerce_text_data (self , data : list , text_formatted_cols : set [int ]) -> None :
1275+ if not text_formatted_cols or not data :
1276+ return
1277+
1278+ for row in data :
1279+ if not row :
1280+ continue
1281+ for col_idx in text_formatted_cols :
1282+ if col_idx >= len (row ):
1283+ continue
1284+ row [col_idx ] = self ._text_format_converter (row [col_idx ])
1285+
1286+ def _resolve_index_positions (self , parser , total_positions : int ) -> set [int ]:
1287+ index_col = self ._parser_attr (parser , "index_col" )
1288+ if index_col is None or index_col is False :
1289+ return set ()
1290+
1291+ if is_list_like (index_col ) and not isinstance (index_col , (str , bytes )):
1292+ entries = list (index_col )
1293+ else :
1294+ entries = [index_col ]
1295+
1296+ orig_names = self ._parser_attr (parser , "orig_names" )
1297+ positions : set [int ] = set ()
1298+ for entry in entries :
1299+ if isinstance (entry , int ):
1300+ if 0 <= entry < total_positions :
1301+ positions .add (entry )
1302+ elif orig_names is not None :
1303+ try :
1304+ pos = orig_names .index (entry )
1305+ except ValueError :
1306+ continue
1307+ positions .add (pos )
1308+ return positions
1309+
1310+ def _index_levels_for_positions (
1311+ self ,
1312+ index_positions : set [int ],
1313+ text_positions : set [int ],
1314+ total_positions : int ,
1315+ ) -> list [int ]:
1316+ if not index_positions :
1317+ return []
1318+
1319+ position_to_level : dict [int , int ] = {}
1320+ level = 0
1321+ for pos in range (total_positions ):
1322+ if pos in index_positions :
1323+ position_to_level [pos ] = level
1324+ level += 1
1325+
1326+ ordered_levels : list [int ] = []
1327+ for pos in sorted (text_positions ):
1328+ level_idx = position_to_level .get (pos )
1329+ if level_idx is not None and level_idx not in ordered_levels :
1330+ ordered_levels .append (level_idx )
1331+ return ordered_levels
1332+
1333+ def _convert_index_labels (self , index , levels_to_convert : list [int ]):
1334+ if not levels_to_convert :
1335+ return index
1336+
1337+ converter = self ._text_format_converter
1338+ if getattr (index , "nlevels" , 1 ) == 1 :
1339+ return index .map (converter )
1340+
1341+ levels_set = set (levels_to_convert )
1342+ tuples = []
1343+ for value in index .tolist ():
1344+ mutable = list (value )
1345+ for level in levels_set :
1346+ if level < len (mutable ):
1347+ mutable [level ] = converter (mutable [level ])
1348+ tuples .append (tuple (mutable ))
1349+ return type (index ).from_tuples (tuples , names = index .names )
1350+
11771351
11781352@set_module ("pandas" )
11791353@doc (storage_options = _shared_docs ["storage_options" ])
0 commit comments