Skip to content

Commit 86771d5

Browse files
WaVEVtimgraham
authored andcommitted
INTPYTHON-793 Improve join performance by pushing complex filter conditions to $lookup
1 parent 2db59af commit 86771d5

File tree

4 files changed

+577
-19
lines changed

4 files changed

+577
-19
lines changed

django_mongodb_backend/compiler.py

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,17 @@
99
from django.db.models.expressions import Case, Col, OrderBy, Ref, Value, When
1010
from django.db.models.functions.comparison import Coalesce
1111
from django.db.models.functions.math import Power
12-
from django.db.models.lookups import IsNull, Lookup
12+
from django.db.models.lookups import IsNull
1313
from django.db.models.sql import compiler
1414
from django.db.models.sql.constants import GET_ITERATOR_CHUNK_SIZE, MULTI, SINGLE
1515
from django.db.models.sql.datastructures import BaseTable
16-
from django.db.models.sql.where import AND, WhereNode
16+
from django.db.models.sql.where import AND, OR, XOR, NothingNode, WhereNode
1717
from django.utils.functional import cached_property
1818
from pymongo import ASCENDING, DESCENDING
1919

2020
from .expressions.search import SearchExpression, SearchVector
2121
from .query import MongoQuery, wrap_database_errors
22-
from .query_utils import is_direct_value
22+
from .query_utils import is_constant_value, is_direct_value
2323

2424

2525
class SQLCompiler(compiler.SQLCompiler):
@@ -661,27 +661,72 @@ def get_combinator_queries(self):
661661
combinator_pipeline.append({"$unset": "_id"})
662662
return combinator_pipeline
663663

664+
def _get_pushable_conditions(self):
665+
"""
666+
Return a dict mapping each alias to a WhereNode holding its pushable
667+
condition.
668+
"""
669+
670+
def collect_pushable(expr, negated=False):
671+
if expr is None or isinstance(expr, NothingNode):
672+
return {}
673+
if isinstance(expr, WhereNode):
674+
# Apply De Morgan: track negation so connectors are flipped
675+
# when needed.
676+
negated ^= expr.negated
677+
pushable_expressions = [
678+
collect_pushable(sub_expr, negated=negated)
679+
for sub_expr in expr.children
680+
if sub_expr is not None
681+
]
682+
operator = expr.connector
683+
if operator == XOR:
684+
return {}
685+
if negated:
686+
operator = OR if operator == AND else AND
687+
alias_children = defaultdict(list)
688+
for pe in pushable_expressions:
689+
for alias, expressions in pe.items():
690+
alias_children[alias].append(expressions)
691+
# Build per-alias pushable condition nodes.
692+
if operator == AND:
693+
return {
694+
alias: WhereNode(children=children, negated=False, connector=operator)
695+
for alias, children in alias_children.items()
696+
}
697+
# Only aliases shared across all branches are pushable for OR.
698+
shared_alias = (
699+
set.intersection(*(set(pe) for pe in pushable_expressions))
700+
if pushable_expressions
701+
else set()
702+
)
703+
return {
704+
alias: WhereNode(children=children, negated=False, connector=operator)
705+
for alias, children in alias_children.items()
706+
if alias in shared_alias
707+
}
708+
# A leaf is pushable only when comparing a field to a constant or
709+
# simple value.
710+
if isinstance(expr.lhs, Col) and (
711+
is_constant_value(expr.rhs) or getattr(expr.rhs, "is_simple_column", False)
712+
):
713+
alias = expr.lhs.alias
714+
expr = WhereNode(children=[expr], negated=negated)
715+
return {alias: expr}
716+
return {}
717+
718+
return collect_pushable(self.get_where())
719+
664720
def get_lookup_pipeline(self):
665721
result = []
666722
# To improve join performance, push conditions (filters) from the
667723
# WHERE ($match) clause to the JOIN ($lookup) clause.
668-
where = self.get_where()
669-
pushed_filters = defaultdict(list)
670-
for expr in where.children if where and where.connector == AND else ():
671-
# Push only basic lookups; no subqueries or complex conditions.
672-
# To avoid duplication across subqueries, only use the LHS target
673-
# table.
674-
if (
675-
isinstance(expr, Lookup)
676-
and isinstance(expr.lhs, Col)
677-
and (is_direct_value(expr.rhs) or isinstance(expr.rhs, (Value, Col)))
678-
):
679-
pushed_filters[expr.lhs.alias].append(expr)
724+
pushed_filters = self._get_pushable_conditions()
680725
for alias in tuple(self.query.alias_map):
681726
if not self.query.alias_refcount[alias] or self.collection_name == alias:
682727
continue
683728
result += self.query.alias_map[alias].as_mql(
684-
self, self.connection, WhereNode(pushed_filters[alias], connector=AND)
729+
self, self.connection, pushed_filters.get(alias)
685730
)
686731
return result
687732

django_mongodb_backend/query_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from django.core.exceptions import FullResultSet
2+
from django.db.models import F
23
from django.db.models.aggregates import Aggregate
34
from django.db.models.expressions import CombinedExpression, Func, Value
45
from django.db.models.sql.query import Query
@@ -67,7 +68,7 @@ def is_constant_value(value):
6768
else:
6869
constants_sub_expressions = True
6970
constants_sub_expressions = constants_sub_expressions and not (
70-
isinstance(value, Query)
71+
isinstance(value, Query | F)
7172
or value.contains_aggregate
7273
or value.contains_over_clause
7374
or value.contains_column_references

docs/releases/5.2.x.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ Bug fixes
1717

1818
- ...
1919

20+
Performance improvements
21+
------------------------
22+
23+
- Improved ``QuerySet`` join (``$lookup``) performance by pushing complex
24+
conditions from the ``WHERE`` (``$match``) clause to the ``$lookup`` stage.
25+
2026
5.2.3
2127
=====
2228

0 commit comments

Comments
 (0)