Skip to content

Commit 9665ae3

Browse files
Some doc failures
Signed-off-by: Goutam <goutam@anyscale.com>
1 parent e202479 commit 9665ae3

File tree

2 files changed

+12
-31
lines changed

2 files changed

+12
-31
lines changed

python/ray/data/expressions.py

Lines changed: 8 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import Any, Callable, Dict, Generic, List, Literal, TypeVar, Union
88

99
import pyarrow
10+
import pyarrow.compute as pc
1011

1112
from ray.data.block import BatchColumn
1213
from ray.data.datatype import DataType
@@ -123,18 +124,15 @@ class _PyArrowExpressionVisitor(_ExprVisitor["pyarrow.compute.Expression"]):
123124
"""Visitor that converts Ray Data expressions to PyArrow compute expressions."""
124125

125126
def visit_column(self, expr: "ColumnExpr") -> "pyarrow.compute.Expression":
126-
import pyarrow.compute as pc
127127

128128
return pc.field(expr.name)
129129

130130
def visit_literal(self, expr: "LiteralExpr") -> "pyarrow.compute.Expression":
131-
import pyarrow.compute as pc
132131

133132
return pc.scalar(expr.value)
134133

135134
def visit_binary(self, expr: "BinaryExpr") -> "pyarrow.compute.Expression":
136135
import pyarrow as pa
137-
import pyarrow.compute as pc
138136

139137
if expr.op in (Operation.IN, Operation.NOT_IN):
140138
left = self.visit(expr.left)
@@ -456,15 +454,15 @@ def struct(self) -> "_StructNamespace":
456454
>>> from ray.data.expressions import col
457455
>>> import ray
458456
>>> import pyarrow as pa
459-
>>> ds = ray.data.from_pyarrow(pa.table({
457+
>>> ds = ray.data.from_arrow(pa.table({
460458
... "user": pa.array([
461459
... {"name": "Alice", "age": 30}
462460
... ], type=pa.struct([
463461
... pa.field("name", pa.string()),
464462
... pa.field("age", pa.int32())
465463
... ]))
466464
... }))
467-
>>> ds = ds.with_column("age", col("user").struct["age"])
465+
>>> ds = ds.with_column("age", col("user").struct["age"]) # doctest: +SKIP
468466
"""
469467
return _StructNamespace(self)
470468

@@ -501,8 +499,6 @@ def _make_namespace_method(config: _PyArrowMethodConfig) -> Callable:
501499
if config.params is None:
502500
# Simple unary function
503501
def method(self) -> "UDFExpr":
504-
import pyarrow.compute as pc
505-
506502
func = getattr(pc, config.pc_func_name)
507503

508504
@udf(return_dtype=config.return_dtype)
@@ -514,8 +510,6 @@ def _wrapper(arr):
514510
else:
515511
# Function with parameters - capture them in closure
516512
def method(self, *args, **kwargs) -> "UDFExpr":
517-
import pyarrow.compute as pc
518-
519513
func = getattr(pc, config.pc_func_name)
520514

521515
@udf(return_dtype=config.return_dtype)
@@ -727,9 +721,9 @@ def __getitem__(self, key: Union[int, slice]) -> "UDFExpr":
727721
UDFExpr that extracts the element or slice.
728722
729723
Example:
730-
>>> col("items").list[0] # Get first item
731-
>>> col("items").list[1:3] # Get slice [1, 3)
732-
>>> col("items").list[-1] # Get last item
724+
>>> col("items").list[0] # Get first item # doctest: +SKIP
725+
>>> col("items").list[1:3] # Get slice [1, 3) # doctest: +SKIP
726+
>>> col("items").list[-1] # Get last item # doctest: +SKIP
733727
"""
734728
if isinstance(key, int):
735729
return self.get(key)
@@ -749,7 +743,6 @@ def get(self, index: int) -> "UDFExpr":
749743
Returns:
750744
UDFExpr that extracts the element at the given index.
751745
"""
752-
import pyarrow.compute as pc
753746

754747
@udf(return_dtype=DataType(object))
755748
def _list_get(arr):
@@ -768,7 +761,6 @@ def slice(self, start: int = None, stop: int = None, step: int = None) -> "UDFEx
768761
Returns:
769762
UDFExpr that extracts a slice from each list.
770763
"""
771-
import pyarrow.compute as pc
772764

773765
@udf(return_dtype=DataType(object))
774766
def _list_slice(arr):
@@ -806,7 +798,6 @@ def strip(self, characters: str = None) -> "UDFExpr":
806798
Returns:
807799
UDFExpr that strips characters from both ends.
808800
"""
809-
import pyarrow.compute as pc
810801

811802
@udf(return_dtype=DataType.string())
812803
def _str_strip(arr):
@@ -826,7 +817,6 @@ def lstrip(self, characters: str = None) -> "UDFExpr":
826817
Returns:
827818
UDFExpr that strips characters from the left.
828819
"""
829-
import pyarrow.compute as pc
830820

831821
@udf(return_dtype=DataType.string())
832822
def _str_lstrip(arr):
@@ -846,7 +836,6 @@ def rstrip(self, characters: str = None) -> "UDFExpr":
846836
Returns:
847837
UDFExpr that strips characters from the right.
848838
"""
849-
import pyarrow.compute as pc
850839

851840
@udf(return_dtype=DataType.string())
852841
def _str_rstrip(arr):
@@ -874,7 +863,6 @@ def pad(
874863
Returns:
875864
UDFExpr that pads strings.
876865
"""
877-
import pyarrow.compute as pc
878866

879867
@udf(return_dtype=DataType.string())
880868
def _str_pad(arr):
@@ -899,7 +887,6 @@ def center(self, width: int, fillchar: str = " ") -> "UDFExpr":
899887
Returns:
900888
UDFExpr that centers strings.
901889
"""
902-
import pyarrow.compute as pc
903890

904891
@udf(return_dtype=DataType.string())
905892
def _str_center(arr):
@@ -918,7 +905,6 @@ def slice(self, start: int, stop: int = None, step: int = 1) -> "UDFExpr":
918905
Returns:
919906
UDFExpr that slices each string.
920907
"""
921-
import pyarrow.compute as pc
922908

923909
@udf(return_dtype=DataType.string())
924910
def _str_slice(arr):
@@ -943,7 +929,6 @@ def replace(
943929
Returns:
944930
UDFExpr that replaces substrings.
945931
"""
946-
import pyarrow.compute as pc
947932

948933
@udf(return_dtype=DataType.string())
949934
def _str_replace(arr):
@@ -974,7 +959,6 @@ def replace_regex(
974959
Returns:
975960
UDFExpr that replaces matching substrings.
976961
"""
977-
import pyarrow.compute as pc
978962

979963
@udf(return_dtype=DataType.string())
980964
def _str_replace_regex(arr):
@@ -1003,7 +987,6 @@ def replace_slice(self, start: int, stop: int, replacement: str) -> "UDFExpr":
1003987
Returns:
1004988
UDFExpr that replaces the slice.
1005989
"""
1006-
import pyarrow.compute as pc
1007990

1008991
@udf(return_dtype=DataType.string())
1009992
def _str_replace_slice(arr):
@@ -1027,7 +1010,6 @@ def split(
10271010
Returns:
10281011
UDFExpr that returns lists of split strings.
10291012
"""
1030-
import pyarrow.compute as pc
10311013

10321014
@udf(return_dtype=DataType(object))
10331015
def _str_split(arr):
@@ -1053,7 +1035,6 @@ def split_regex(
10531035
Returns:
10541036
UDFExpr that returns lists of split strings.
10551037
"""
1056-
import pyarrow.compute as pc
10571038

10581039
@udf(return_dtype=DataType(object))
10591040
def _str_split_regex(arr):
@@ -1078,7 +1059,6 @@ def split_whitespace(
10781059
Returns:
10791060
UDFExpr that returns lists of split strings.
10801061
"""
1081-
import pyarrow.compute as pc
10821062

10831063
@udf(return_dtype=DataType(object))
10841064
def _str_split_whitespace(arr):
@@ -1101,7 +1081,6 @@ def extract(self, pattern: str) -> "UDFExpr":
11011081
Returns:
11021082
UDFExpr that returns the first matching substring.
11031083
"""
1104-
import pyarrow.compute as pc
11051084

11061085
@udf(return_dtype=DataType.string())
11071086
def _str_extract(arr):
@@ -1118,7 +1097,6 @@ def repeat(self, n: int) -> "UDFExpr":
11181097
Returns:
11191098
UDFExpr that repeats strings.
11201099
"""
1121-
import pyarrow.compute as pc
11221100

11231101
@udf(return_dtype=DataType.string())
11241102
def _str_repeat(arr):
@@ -1156,8 +1134,8 @@ def __getitem__(self, field_name: str) -> "UDFExpr":
11561134
UDFExpr that extracts the specified field from each struct.
11571135
11581136
Example:
1159-
>>> col("user").struct["age"] # Get age field
1160-
>>> col("user").struct["address"].struct["city"] # Get nested city field
1137+
>>> col("user").struct["age"] # Get age field # doctest: +SKIP
1138+
>>> col("user").struct["address"].struct["city"] # Get nested city field # doctest: +SKIP
11611139
"""
11621140
return self.field(field_name)
11631141

@@ -1170,7 +1148,6 @@ def field(self, field_name: str) -> "UDFExpr":
11701148
Returns:
11711149
UDFExpr that extracts the specified field from each struct.
11721150
"""
1173-
import pyarrow.compute as pc
11741151

11751152
@udf(return_dtype=DataType(object))
11761153
def _struct_field(arr):

python/ray/data/expressions.pyi

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,10 @@ class AliasExpr(Expr):
131131
class StarExpr(Expr):
132132
def structurally_equals(self, other: Any) -> bool: ...
133133

134+
# ──────────────────────────────────────
135+
# Namespace Classes with Full Method Signatures
136+
# ──────────────────────────────────────
137+
134138
class _ListNamespace:
135139
"""Namespace for list operations."""
136140
_expr: Expr

0 commit comments

Comments
 (0)