@@ -348,6 +348,15 @@ def astype(self, dtype, copy: bool = True):
348348 _str_len = ArrowStringArrayMixin ._str_len
349349 _str_slice = ArrowStringArrayMixin ._str_slice
350350
351+ @staticmethod
352+ def _is_re_pattern_with_flags (pat : str | re .Pattern ) -> bool :
353+ # check if `pat` is a compiled regex pattern with flags that are not
354+ # supported by pyarrow
355+ return (
356+ isinstance (pat , re .Pattern )
357+ and (pat .flags & ~ (re .IGNORECASE | re .UNICODE )) != 0
358+ )
359+
351360 @staticmethod
352361 def _preprocess_re_pattern (pat : re .Pattern , case : bool ):
353362 flags = pat .flags
@@ -369,12 +378,11 @@ def _str_contains(
369378 na = lib .no_default ,
370379 regex : bool = True ,
371380 ):
372- if flags :
381+ if flags or self . _is_re_pattern_with_flags ( pat ) :
373382 return super ()._str_contains (pat , case , flags , na , regex )
374383 if isinstance (pat , re .Pattern ):
384+ # TODO flags passed separately by user are ignored
375385 pat , case , flags = self ._preprocess_re_pattern (pat , case )
376- if flags :
377- return super ()._str_contains (pat , case , flags , na , regex )
378386
379387 return ArrowStringArrayMixin ._str_contains (self , pat , case , flags , na , regex )
380388
@@ -385,12 +393,10 @@ def _str_match(
385393 flags : int = 0 ,
386394 na : Scalar | lib .NoDefault = lib .no_default ,
387395 ):
388- if flags :
396+ if flags or self . _is_re_pattern_with_flags ( pat ) :
389397 return super ()._str_match (pat , case , flags , na )
390398 if isinstance (pat , re .Pattern ):
391399 pat , case , flags = self ._preprocess_re_pattern (pat , case )
392- if flags :
393- return super ()._str_match (pat , case , flags , na )
394400
395401 return ArrowStringArrayMixin ._str_match (self , pat , case , flags , na )
396402
@@ -401,12 +407,10 @@ def _str_fullmatch(
401407 flags : int = 0 ,
402408 na : Scalar | lib .NoDefault = lib .no_default ,
403409 ):
404- if flags :
410+ if flags or self . _is_re_pattern_with_flags ( pat ) :
405411 return super ()._str_fullmatch (pat , case , flags , na )
406412 if isinstance (pat , re .Pattern ):
407413 pat , case , flags = self ._preprocess_re_pattern (pat , case )
408- if flags :
409- return super ()._str_fullmatch (pat , case , flags , na )
410414
411415 return ArrowStringArrayMixin ._str_fullmatch (self , pat , case , flags , na )
412416
0 commit comments