77from typing import Any , Callable , Dict , Generic , List , Literal , TypeVar , Union
88
99import pyarrow
10+ import pyarrow .compute as pc
1011
1112from ray .data .block import BatchColumn
1213from ray .data .datatype import DataType
@@ -123,18 +124,15 @@ class _PyArrowExpressionVisitor(_ExprVisitor["pyarrow.compute.Expression"]):
123124 """Visitor that converts Ray Data expressions to PyArrow compute expressions."""
124125
125126 def visit_column (self , expr : "ColumnExpr" ) -> "pyarrow.compute.Expression" :
126- import pyarrow .compute as pc
127127
128128 return pc .field (expr .name )
129129
130130 def visit_literal (self , expr : "LiteralExpr" ) -> "pyarrow.compute.Expression" :
131- import pyarrow .compute as pc
132131
133132 return pc .scalar (expr .value )
134133
135134 def visit_binary (self , expr : "BinaryExpr" ) -> "pyarrow.compute.Expression" :
136135 import pyarrow as pa
137- import pyarrow .compute as pc
138136
139137 if expr .op in (Operation .IN , Operation .NOT_IN ):
140138 left = self .visit (expr .left )
@@ -456,15 +454,15 @@ def struct(self) -> "_StructNamespace":
456454 >>> from ray.data.expressions import col
457455 >>> import ray
458456 >>> import pyarrow as pa
459- >>> ds = ray.data.from_pyarrow (pa.table({
457+ >>> ds = ray.data.from_arrow (pa.table({
460458 ... "user": pa.array([
461459 ... {"name": "Alice", "age": 30}
462460 ... ], type=pa.struct([
463461 ... pa.field("name", pa.string()),
464462 ... pa.field("age", pa.int32())
465463 ... ]))
466464 ... }))
467- >>> ds = ds.with_column("age", col("user").struct["age"])
465+ >>> ds = ds.with_column("age", col("user").struct["age"]) # doctest: +SKIP
468466 """
469467 return _StructNamespace (self )
470468
@@ -501,8 +499,6 @@ def _make_namespace_method(config: _PyArrowMethodConfig) -> Callable:
501499 if config .params is None :
502500 # Simple unary function
503501 def method (self ) -> "UDFExpr" :
504- import pyarrow .compute as pc
505-
506502 func = getattr (pc , config .pc_func_name )
507503
508504 @udf (return_dtype = config .return_dtype )
@@ -514,8 +510,6 @@ def _wrapper(arr):
514510 else :
515511 # Function with parameters - capture them in closure
516512 def method (self , * args , ** kwargs ) -> "UDFExpr" :
517- import pyarrow .compute as pc
518-
519513 func = getattr (pc , config .pc_func_name )
520514
521515 @udf (return_dtype = config .return_dtype )
@@ -727,9 +721,9 @@ def __getitem__(self, key: Union[int, slice]) -> "UDFExpr":
727721 UDFExpr that extracts the element or slice.
728722
729723 Example:
730- >>> col("items").list[0] # Get first item
731- >>> col("items").list[1:3] # Get slice [1, 3)
732- >>> col("items").list[-1] # Get last item
724+ >>> col("items").list[0] # Get first item # doctest: +SKIP
725+ >>> col("items").list[1:3] # Get slice [1, 3) # doctest: +SKIP
726+ >>> col("items").list[-1] # Get last item # doctest: +SKIP
733727 """
734728 if isinstance (key , int ):
735729 return self .get (key )
@@ -749,7 +743,6 @@ def get(self, index: int) -> "UDFExpr":
749743 Returns:
750744 UDFExpr that extracts the element at the given index.
751745 """
752- import pyarrow .compute as pc
753746
754747 @udf (return_dtype = DataType (object ))
755748 def _list_get (arr ):
@@ -768,7 +761,6 @@ def slice(self, start: int = None, stop: int = None, step: int = None) -> "UDFEx
768761 Returns:
769762 UDFExpr that extracts a slice from each list.
770763 """
771- import pyarrow .compute as pc
772764
773765 @udf (return_dtype = DataType (object ))
774766 def _list_slice (arr ):
@@ -806,7 +798,6 @@ def strip(self, characters: str = None) -> "UDFExpr":
806798 Returns:
807799 UDFExpr that strips characters from both ends.
808800 """
809- import pyarrow .compute as pc
810801
811802 @udf (return_dtype = DataType .string ())
812803 def _str_strip (arr ):
@@ -826,7 +817,6 @@ def lstrip(self, characters: str = None) -> "UDFExpr":
826817 Returns:
827818 UDFExpr that strips characters from the left.
828819 """
829- import pyarrow .compute as pc
830820
831821 @udf (return_dtype = DataType .string ())
832822 def _str_lstrip (arr ):
@@ -846,7 +836,6 @@ def rstrip(self, characters: str = None) -> "UDFExpr":
846836 Returns:
847837 UDFExpr that strips characters from the right.
848838 """
849- import pyarrow .compute as pc
850839
851840 @udf (return_dtype = DataType .string ())
852841 def _str_rstrip (arr ):
@@ -874,7 +863,6 @@ def pad(
874863 Returns:
875864 UDFExpr that pads strings.
876865 """
877- import pyarrow .compute as pc
878866
879867 @udf (return_dtype = DataType .string ())
880868 def _str_pad (arr ):
@@ -899,7 +887,6 @@ def center(self, width: int, fillchar: str = " ") -> "UDFExpr":
899887 Returns:
900888 UDFExpr that centers strings.
901889 """
902- import pyarrow .compute as pc
903890
904891 @udf (return_dtype = DataType .string ())
905892 def _str_center (arr ):
@@ -918,7 +905,6 @@ def slice(self, start: int, stop: int = None, step: int = 1) -> "UDFExpr":
918905 Returns:
919906 UDFExpr that slices each string.
920907 """
921- import pyarrow .compute as pc
922908
923909 @udf (return_dtype = DataType .string ())
924910 def _str_slice (arr ):
@@ -943,7 +929,6 @@ def replace(
943929 Returns:
944930 UDFExpr that replaces substrings.
945931 """
946- import pyarrow .compute as pc
947932
948933 @udf (return_dtype = DataType .string ())
949934 def _str_replace (arr ):
@@ -974,7 +959,6 @@ def replace_regex(
974959 Returns:
975960 UDFExpr that replaces matching substrings.
976961 """
977- import pyarrow .compute as pc
978962
979963 @udf (return_dtype = DataType .string ())
980964 def _str_replace_regex (arr ):
@@ -1003,7 +987,6 @@ def replace_slice(self, start: int, stop: int, replacement: str) -> "UDFExpr":
1003987 Returns:
1004988 UDFExpr that replaces the slice.
1005989 """
1006- import pyarrow .compute as pc
1007990
1008991 @udf (return_dtype = DataType .string ())
1009992 def _str_replace_slice (arr ):
@@ -1027,7 +1010,6 @@ def split(
10271010 Returns:
10281011 UDFExpr that returns lists of split strings.
10291012 """
1030- import pyarrow .compute as pc
10311013
10321014 @udf (return_dtype = DataType (object ))
10331015 def _str_split (arr ):
@@ -1053,7 +1035,6 @@ def split_regex(
10531035 Returns:
10541036 UDFExpr that returns lists of split strings.
10551037 """
1056- import pyarrow .compute as pc
10571038
10581039 @udf (return_dtype = DataType (object ))
10591040 def _str_split_regex (arr ):
@@ -1078,7 +1059,6 @@ def split_whitespace(
10781059 Returns:
10791060 UDFExpr that returns lists of split strings.
10801061 """
1081- import pyarrow .compute as pc
10821062
10831063 @udf (return_dtype = DataType (object ))
10841064 def _str_split_whitespace (arr ):
@@ -1101,7 +1081,6 @@ def extract(self, pattern: str) -> "UDFExpr":
11011081 Returns:
11021082 UDFExpr that returns the first matching substring.
11031083 """
1104- import pyarrow .compute as pc
11051084
11061085 @udf (return_dtype = DataType .string ())
11071086 def _str_extract (arr ):
@@ -1118,7 +1097,6 @@ def repeat(self, n: int) -> "UDFExpr":
11181097 Returns:
11191098 UDFExpr that repeats strings.
11201099 """
1121- import pyarrow .compute as pc
11221100
11231101 @udf (return_dtype = DataType .string ())
11241102 def _str_repeat (arr ):
@@ -1156,8 +1134,8 @@ def __getitem__(self, field_name: str) -> "UDFExpr":
11561134 UDFExpr that extracts the specified field from each struct.
11571135
11581136 Example:
1159- >>> col("user").struct["age"] # Get age field
1160- >>> col("user").struct["address"].struct["city"] # Get nested city field
1137+ >>> col("user").struct["age"] # Get age field # doctest: +SKIP
1138+ >>> col("user").struct["address"].struct["city"] # Get nested city field # doctest: +SKIP
11611139 """
11621140 return self .field (field_name )
11631141
@@ -1170,7 +1148,6 @@ def field(self, field_name: str) -> "UDFExpr":
11701148 Returns:
11711149 UDFExpr that extracts the specified field from each struct.
11721150 """
1173- import pyarrow .compute as pc
11741151
11751152 @udf (return_dtype = DataType (object ))
11761153 def _struct_field (arr ):
0 commit comments