Skip to content

Commit 1da1bc1

Browse files
JibolaNoahStapp
authored andcommitted
INTPYTHON-736 Convert simple $expr queries to $match queries
Co-authored-by: Noah Stapp <noah.stapp@mongodb.com>
1 parent dc134f6 commit 1da1bc1

File tree

11 files changed

+886
-96
lines changed

11 files changed

+886
-96
lines changed

django_mongodb_backend/query.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from django.db.models.sql.where import AND, OR, XOR, ExtraWhere, NothingNode, WhereNode
1212
from pymongo.errors import BulkWriteError, DuplicateKeyError, PyMongoError
1313

14+
from django_mongodb_backend.query_conversion.query_optimizer import QueryOptimizer
15+
1416

1517
def wrap_database_errors(func):
1618
@wraps(func)
@@ -55,6 +57,7 @@ def __init__(self, compiler):
5557
# $lookup stage that encapsulates the pipeline for performing a nested
5658
# subquery.
5759
self.subquery_lookup = None
60+
self.query_optimizer = QueryOptimizer()
5861

5962
def __repr__(self):
6063
return f"<MongoQuery: {self.match_mql!r} ORDER {self.ordering!r}>"
@@ -87,7 +90,7 @@ def get_pipeline(self):
8790
for query in self.subqueries or ():
8891
pipeline.extend(query.get_pipeline())
8992
if self.match_mql:
90-
pipeline.append({"$match": self.match_mql})
93+
pipeline.extend(self.query_optimizer.convert_expr_to_match(self.match_mql))
9194
if self.aggregation_pipeline:
9295
pipeline.extend(self.aggregation_pipeline)
9396
if self.project_fields:

django_mongodb_backend/query_conversion/__init__.py

Whitespace-only changes.
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
"""Expression To Match Converters"""
2+
3+
4+
class BaseConverter:
5+
"""
6+
Base class for optimizers that handle specific operations in MQL queries.
7+
"""
8+
9+
@classmethod
10+
def convert(cls, expr):
11+
raise NotImplementedError("Subclasses must implement this method.")
12+
13+
@classmethod
14+
def is_simple_value(cls, value):
15+
"""Is the value is a simple type (not a dict)?"""
16+
if value is None:
17+
return True
18+
if isinstance(value, str) and value.startswith("$"):
19+
return False
20+
if isinstance(value, (list, tuple, set)):
21+
return all(cls.is_simple_value(v) for v in value)
22+
# TODO: Support `$getField` conversion.
23+
return not isinstance(value, dict)
24+
25+
26+
class BinaryConverter(BaseConverter):
27+
"""
28+
Base class for optimizers that handle binary expression operations in MQL queries.
29+
"""
30+
31+
operator: str
32+
33+
@classmethod
34+
def convert(cls, args):
35+
if isinstance(args, list) and len(args) == 2:
36+
field_expr, value = args
37+
# Check if first argument is a simple field reference.
38+
if (
39+
isinstance(field_expr, str)
40+
and field_expr.startswith("$")
41+
and cls.is_simple_value(value)
42+
):
43+
field_name = field_expr[1:] # Remove the $ prefix.
44+
if cls.operator == "$eq":
45+
return {field_name: value}
46+
return {field_name: {cls.operator: value}}
47+
48+
return None
49+
50+
51+
class EqConverter(BinaryConverter):
52+
"""Convert $eq operation to a $match compatible query.
53+
54+
For example::
55+
"$expr": {
56+
{"$eq": ["$status", "active"]}
57+
}
58+
is converted to::
59+
{"status": "active"}
60+
"""
61+
62+
operator = "$eq"
63+
64+
65+
class GtConverter(BinaryConverter):
66+
"""Convert $gt operation to a $match compatible query.
67+
68+
For example::
69+
"$expr": {
70+
{"$gt": ["$price", 100]}
71+
}
72+
is converted to::
73+
{"$gt": ["price", 100]}
74+
"""
75+
76+
operator = "$gt"
77+
78+
79+
class GteConverter(BinaryConverter):
80+
"""Convert $gte operation to a $match compatible query.
81+
82+
For example::
83+
"$expr": {
84+
{"$gte": ["$price", 100]}
85+
}
86+
is converted to::
87+
{"price": {"$gte", 100}}
88+
"""
89+
90+
operator = "$gte"
91+
92+
93+
class LtConverter(BinaryConverter):
94+
"""Convert $lt operation to a $match compatible query.
95+
96+
For example::
97+
"$expr": {
98+
{"$lt": ["$price", 100]}
99+
}
100+
is converted to::
101+
{"$lt": ["price", 100]}
102+
"""
103+
104+
operator = "$lt"
105+
106+
107+
class LteConverter(BinaryConverter):
108+
"""Convert $lte operation to a $match compatible query.
109+
110+
For example::
111+
"$expr": {
112+
{"$lte": ["$price", 100]}
113+
}
114+
is converted to::
115+
{"price": {"$lte", 100}}
116+
"""
117+
118+
operator = "$lte"
119+
120+
121+
class InConverter(BaseConverter):
122+
"""Convert $in operation to a $match compatible query.
123+
124+
For example::
125+
"$expr": {
126+
{"$in": ["$category", ["electronics", "books"]]}
127+
}
128+
is converted to::
129+
{"category": {"$in": ["electronics", "books"]}}
130+
"""
131+
132+
@classmethod
133+
def convert(cls, in_args):
134+
if isinstance(in_args, list) and len(in_args) == 2:
135+
field_expr, values = in_args
136+
137+
# Check if first argument is a simple field reference
138+
if isinstance(field_expr, str) and field_expr.startswith("$"):
139+
field_name = field_expr[1:] # Remove the $ prefix
140+
if isinstance(values, (list, tuple, set)) and all(
141+
cls.is_simple_value(v) for v in values
142+
):
143+
return {field_name: {"$in": values}}
144+
145+
return None
146+
147+
148+
class LogicalConverter(BaseConverter):
149+
"""Generic for converting logical operations to a $match compatible query."""
150+
151+
@classmethod
152+
def convert(cls, combined_conditions):
153+
if isinstance(combined_conditions, list):
154+
optimized_conditions = []
155+
for condition in combined_conditions:
156+
if isinstance(condition, dict) and len(condition) == 1:
157+
if optimized_condition := convert_expression(condition):
158+
optimized_conditions.append(optimized_condition)
159+
else:
160+
# Any failure should stop optimization
161+
return None
162+
if optimized_conditions:
163+
return {cls._logical_op: optimized_conditions}
164+
return None
165+
166+
167+
class OrConverter(LogicalConverter):
168+
"""Convert $or operation to a $match compatible query.
169+
170+
For example::
171+
"$expr": {
172+
"$or": [
173+
{"$eq": ["$status", "active"]},
174+
{"$in": ["$category", ["electronics", "books"]]},
175+
]
176+
}
177+
is converted to::
178+
"$or": [
179+
{"status": "active"},
180+
{"category": {"$in": ["electronics", "books"]}},
181+
]
182+
"""
183+
184+
_logical_op = "$or"
185+
186+
187+
class AndConverter(LogicalConverter):
188+
"""Convert $and operation to a $match compatible query.
189+
190+
For example::
191+
"$expr": {
192+
"$and": [
193+
{"$eq": ["$status", "active"]},
194+
{"$in": ["$category", ["electronics", "books"]]},
195+
{"$eq": ["$verified", True]},
196+
]
197+
}
198+
is converted to::
199+
"$and": [
200+
{"status": "active"},
201+
{"category": {"$in": ["electronics", "books"]}},
202+
{"verified": True},
203+
]
204+
"""
205+
206+
_logical_op = "$and"
207+
208+
209+
OPTIMIZABLE_OPS = {
210+
"$eq": EqConverter,
211+
"$in": InConverter,
212+
"$and": AndConverter,
213+
"$or": OrConverter,
214+
"$gt": GtConverter,
215+
"$gte": GteConverter,
216+
"$lt": LtConverter,
217+
"$lte": LteConverter,
218+
}
219+
220+
221+
def convert_expression(expr):
222+
"""
223+
Optimize an MQL expression by extracting optimizable conditions.
224+
225+
Args:
226+
expr: Dictionary containing the MQL expression
227+
228+
Returns:
229+
Optimized match condition or None if not optimizable
230+
"""
231+
if isinstance(expr, dict) and len(expr) == 1:
232+
op = next(iter(expr.keys()))
233+
if op in OPTIMIZABLE_OPS:
234+
return OPTIMIZABLE_OPS[op].convert(expr[op])
235+
return None
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
from copy import deepcopy
2+
3+
from django_mongodb_backend.query_conversion.expression_converters import convert_expression
4+
5+
6+
class QueryOptimizer:
7+
def convert_expr_to_match(self, expr):
8+
"""
9+
Takes an MQL query with $expr and optimizes it by extracting
10+
optimizable conditions into separate $match stages.
11+
12+
Args:
13+
expr_query: Dictionary containing the $expr query
14+
15+
Returns:
16+
List of optimized match conditions
17+
"""
18+
expr_query = deepcopy(expr)
19+
20+
if "$expr" not in expr_query:
21+
return [expr_query]
22+
23+
if expr_query["$expr"] == {}:
24+
return [{"$match": {}}]
25+
26+
expr_content = expr_query["$expr"]
27+
28+
# Handle the expression content
29+
return self._process_expression(expr_content)
30+
31+
def _process_expression(self, expr):
32+
"""
33+
Process an expression and extract optimizable conditions.
34+
35+
Args:
36+
expr: The expression to process
37+
"""
38+
match_conditions = []
39+
remaining_conditions = []
40+
if isinstance(expr, dict):
41+
# Check if this is an $and operation
42+
has_and = "$and" in expr
43+
has_or = "$or" in expr
44+
# Do a top-level check for $and or $or because these should inform
45+
# If they fail, they should failover to a remaining conditions list
46+
# There's probably a better way to do this, but this is a start
47+
if has_and:
48+
and_match_conditions = self._process_logical_conditions("$and", expr["$and"])
49+
match_conditions.extend(and_match_conditions)
50+
if has_or:
51+
or_match_conditions = self._process_logical_conditions("$or", expr["$or"])
52+
match_conditions.extend(or_match_conditions)
53+
if not has_and and not has_or:
54+
# Process single condition
55+
optimized = convert_expression(expr)
56+
if optimized:
57+
match_conditions.append({"$match": optimized})
58+
else:
59+
remaining_conditions.append({"$match": {"$expr": expr}})
60+
else:
61+
# Can't optimize
62+
remaining_conditions.append({"$expr": expr})
63+
return match_conditions + remaining_conditions
64+
65+
def _process_logical_conditions(self, logical_op, logical_conditions):
66+
"""
67+
Process conditions within a logical array.
68+
69+
Args:
70+
logical_conditions: List of conditions within logical operator
71+
"""
72+
optimized_conditions = []
73+
match_conditions = []
74+
remaining_conditions = []
75+
for condition in logical_conditions:
76+
_remaining_conditions = []
77+
if isinstance(condition, dict):
78+
if optimized := convert_expression(condition):
79+
optimized_conditions.append(optimized)
80+
else:
81+
_remaining_conditions.append(condition)
82+
else:
83+
_remaining_conditions.append(condition)
84+
if _remaining_conditions:
85+
# Any expressions we can't optimize must remain
86+
# in an $expr that preserves the logical operator
87+
if len(_remaining_conditions) > 1:
88+
remaining_conditions.append({"$expr": {logical_op: _remaining_conditions}})
89+
else:
90+
remaining_conditions.append({"$expr": _remaining_conditions[0]})
91+
if optimized_conditions:
92+
optimized_conditions.extend(remaining_conditions)
93+
if len(optimized_conditions) > 1:
94+
match_conditions.append({"$match": {logical_op: optimized_conditions}})
95+
else:
96+
match_conditions.append({"$match": optimized_conditions[0]})
97+
else:
98+
match_conditions.append({"$match": {logical_op: remaining_conditions}})
99+
return match_conditions

tests/expression_converter_/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)