dolthub · angelamayxie · Nov 3, 2025 · Nov 3, 2025 · Nov 4, 2025 · Nov 4, 2025
diff --git a/enginetest/queries/imdb_plans.go b/enginetest/queries/imdb_plans.go
diff --git a/enginetest/queries/integration_plans.go b/enginetest/queries/integration_plans.go
diff --git a/enginetest/queries/query_plans.go b/enginetest/queries/query_plans.go
diff --git a/go.sum b/go.sum
@@ -18,8 +18,6 @@ github.com/dolthub/jsonpath v0.0.2-0.20240227200619-19675ab05c71 h1:bMGS25NWAGTE
 github.com/dolthub/jsonpath v0.0.2-0.20240227200619-19675ab05c71/go.mod h1:2/2zjLQ/JOOSbbSboojeg+cAwcRV0fDLzIiWch/lhqI=
 github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9XGFa6q5Ap4Z/OhNkAMBaK5YeuEzwJt+NZdhiE=
 github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY=
-github.com/dolthub/vitess v0.0.0-20250930230441-70c2c6a98e33 h1:ScHTwNbcVC6JH1OSyXzj8S4w67BIpRXwTSjrac3/PSw=
-github.com/dolthub/vitess v0.0.0-20250930230441-70c2c6a98e33/go.mod h1:8pvvk5OLaLN9LLxghyczUapn/97l+mBgIb10qC1LG84=
 github.com/dolthub/vitess v0.0.0-20251031205214-d09b65bd77b0 h1:RXopPQP1bwb5fsnXAC89joqk/3pIgQnQSU8lAHJhue0=
 github.com/dolthub/vitess v0.0.0-20251031205214-d09b65bd77b0/go.mod h1:FLWqdXsAeeBQyFwDjmBVu0GnbjI2MKeRf3tRVdJEKlI=
 github.com/dolthub/vitess v0.0.0-20251105091622-b08b393fd9b1 h1:2uiHo4gkf2n/Cw9uCBDkCWj35Vz48Uhif2B9P+DqgCg=

diff --git a/sql/func_deps.go b/sql/func_deps.go
@@ -43,13 +43,17 @@ func (e *EquivSets) Sets() []ColSet {
 }
 
 func (e *EquivSets) String() string {
+	return e.StringWithLabel("equiv")
+}
+
+func (e *EquivSets) StringWithLabel(label string) string {
 	if e == nil {
-		return "equiv()"
+		return fmt.Sprintf("%s()", label)
 	}
 	b := strings.Builder{}
 	sep := ""
 	for i, set := range e.sets {
-		b.WriteString(fmt.Sprintf("%sequiv%s", sep, set))
+		b.WriteString(fmt.Sprintf("%s%s%s", sep, label, set))
 		if i == 0 {
 			sep = "; "
 		}
@@ -102,18 +106,6 @@ func (k *Key) implies(other Key) bool {
 // a fraction of the total input set. The first key always determines
 // the entire relation, which seems good enough for many cases.
 // Maintaining partials sets also requires much less bookkeeping.
-//
-// TODO: We used to not track dependency sets and only add keys that
-// determined the entire relation. One observed downside of that approach
-// is that left joins fail to convert equivalencies on the null-extended
-// side to lax functional dependencies. For example, in the query below,
-// the left join loses (a) == (m) because (m) can now be NULL:
-//
-// SELECT * from adbcd LEFT_JOIN mnpq WHERE a = m
-//
-// But we could maintain (m)~~>(n), which higher-level null enforcement
-// (ex: GROUPING) can reclaim as equivalence. Although we now track partial
-// dependency sets, this may still not be supported.
 type FuncDepSet struct {
 	// all columns in this relation
 	all ColSet
@@ -123,6 +115,8 @@ type FuncDepSet struct {
 	consts ColSet
 	// tracks in-scope equivalent closure
 	equivs *EquivSets
+	// tracks partial equivalent closure. This is used for left joins, where the right side is null-extended
+	partialEquivs *EquivSets
 	// keys includes the set of primary and secondary keys
 	// accumulated in the relation. The first key is the best
 	// key we have seen so far, where strict > lax and shorter
@@ -213,22 +207,25 @@ func (f *FuncDepSet) String() string {
 		b.WriteString(fmt.Sprintf("%s%s", sep, f.equivs))
 		sep = "; "
 	}
-	if len(f.keys) < 2 {
-		return b.String()
+	if f.partialEquivs.Len() > 0 {
+		b.WriteString(fmt.Sprintf("%s%s", sep, f.partialEquivs.StringWithLabel("partialEquiv")))
+		sep = "; "
 	}
-	for _, k := range f.keys[1:] {
-		var cols string
-		if k.allCols == f.all {
-			cols = k.cols.String()
-		} else {
-			cols = fmt.Sprintf("%s/%s", k.cols, k.allCols)
-		}
-		if k.strict {
-			b.WriteString(fmt.Sprintf("%sfd%s", sep, cols))
-		} else {
-			b.WriteString(fmt.Sprintf("%slax-fd%s", sep, cols))
+	if len(f.keys) >= 2 {
+		for _, k := range f.keys[1:] {
+			var cols string
+			if k.allCols == f.all {
+				cols = k.cols.String()
+			} else {
+				cols = fmt.Sprintf("%s/%s", k.cols, k.allCols)
+			}
+			if k.strict {
+				b.WriteString(fmt.Sprintf("%sfd%s", sep, cols))
+			} else {
+				b.WriteString(fmt.Sprintf("%slax-fd%s", sep, cols))
+			}
+			sep = "; "
 		}
-		sep = "; "
 	}
 	return b.String()
 }
@@ -238,7 +235,8 @@ func (f *FuncDepSet) Constants() ColSet {
 }
 
 func (f *FuncDepSet) EquivalenceClosure(cols ColSet) ColSet {
-	for _, set := range f.equivs.Sets() {
+	equivSets := append(f.equivs.Sets(), f.partialEquivs.Sets()...)
+	for _, set := range equivSets {
 		if set.Intersects(cols) {
 			cols = cols.Union(set)
 		}
@@ -257,9 +255,6 @@ func (f *FuncDepSet) AddConstants(cols ColSet) {
 
 func (f *FuncDepSet) AddEquiv(i, j ColumnId) {
 	cols := NewColSet(i, j)
-	if f.equivs == nil {
-		f.equivs = &EquivSets{}
-	}
 	f.AddEquivSet(cols)
 }
 
@@ -276,6 +271,14 @@ func (f *FuncDepSet) AddEquivSet(cols ColSet) {
 	}
 }
 
+func (f *FuncDepSet) AddPartialEquiv(i, j ColumnId) {
+	cols := NewColSet(i, j)
+	if f.partialEquivs == nil {
+		f.partialEquivs = &EquivSets{}
+	}
+	f.partialEquivs.Add(cols)
+}
+
 func (f *FuncDepSet) AddKey(k Key) {
 	switch k.strict {
 	case true:
@@ -661,10 +664,17 @@ func NewLeftJoinFDs(left, right *FuncDepSet, filters [][2]ColumnId) *FuncDepSet
 		}
 		ret.AddConstants(leftConst)
 	}
-	// only left equiv holds
+
+	// add left equivs
 	for _, equiv := range left.equivs.Sets() {
 		ret.AddEquivSet(equiv)
 	}
+	// add partial equiv filters if right-side column is not nullable
+	for _, f := range filters {
+		if right.notNull.Contains(f[0]) || right.notNull.Contains(f[1]) {
+			ret.AddPartialEquiv(f[0], f[1])
+		}
+	}
 
 	if leftStrict && leftColsAreInnerJoinKey {
 		strictKey := Key{strict: true, allCols: ret.all, cols: leftKey}
@@ -676,10 +686,6 @@ func NewLeftJoinFDs(left, right *FuncDepSet, filters [][2]ColumnId) *FuncDepSet
 		ret.keys = append(ret.keys, jKey)
 	}
 
-	// no filter equivs are valid
-	// TODO if right columns are non-nullable in ON filter, equivs hold
-	// technically we could do (r)~~>(l), but is this useful?
-
 	// right-side keys become lax unless all non-nullable in original
 	for _, key := range rightKeys {
 		if !key.cols.SubsetOf(right.notNull) {

diff --git a/sql/func_deps_test.go b/sql/func_deps_test.go
@@ -348,7 +348,7 @@ func TestFuncDeps_LeftJoin(t *testing.T) {
 		join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{})
 		assert.Equal(t, "key(1,6,7); equiv(6,8,9); lax-fd(3)/(1-5)", join.String())
 	})
-	t.Run("join filter equiv", func(t *testing.T) {
+	t.Run("join filter partial equiv", func(t *testing.T) {
 		// SELECT * FROM abcde RIGHT OUTER JOIN mnpq ON a=m
 		abcde := &FuncDepSet{all: cols(1, 2, 3, 4, 5)}
 		abcde.AddNotNullable(cols(1))
@@ -359,6 +359,19 @@ func TestFuncDeps_LeftJoin(t *testing.T) {
 		mnpq.AddNotNullable(cols(6, 7))
 		mnpq.AddStrictKey(cols(6, 7))
 
+		join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}})
+		assert.Equal(t, "key(6,7); partialEquiv(1,6); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String())
+	})
+	t.Run("join filter no partial equiv", func(t *testing.T) {
+		// SELECT * FROM abcde RIGHT OUTER JOIN mnpq ON a=m
+		abcde := &FuncDepSet{all: cols(1, 2, 3, 4, 5)}
+		abcde.AddStrictKey(cols(1))
+		abcde.AddLaxKey(cols(2, 3))
+
+		mnpq := &FuncDepSet{all: cols(6, 7, 8, 9)}
+		mnpq.AddNotNullable(cols(6, 7))
+		mnpq.AddStrictKey(cols(6, 7))
+
 		join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}})
 		assert.Equal(t, "key(6,7); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String())
 	})
@@ -374,7 +387,7 @@ func TestFuncDeps_LeftJoin(t *testing.T) {
 		mnpq.AddStrictKey(cols(6, 7))
 
 		join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}, {1, 2}})
-		assert.Equal(t, "key(6,7); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String())
+		assert.Equal(t, "key(6,7); partialEquiv(1,2,6); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String())
 	})
 	t.Run("max1Row left join", func(t *testing.T) {
 		abcde := &FuncDepSet{all: cols(1, 2, 3, 4, 5)}
@@ -390,7 +403,7 @@ func TestFuncDeps_LeftJoin(t *testing.T) {
 		mnpq.AddStrictKey(cols(6, 7))
 
 		join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}, {1, 2}})
-		assert.Equal(t, "key(); constant(1,6,7)", join.String())
+		assert.Equal(t, "key(); constant(1,6,7); partialEquiv(1,2,6)", join.String())
 	})
 }
 

diff --git a/sql/memo/coster.go b/sql/memo/coster.go
@@ -243,7 +243,7 @@ func lookupJoinSelectivity(l *IndexScan, joinBase *JoinBase) float64 {
 	return math.Pow(perKeyCostReductionFactor, float64(len(l.Table.Expressions()))) * optimisticJoinSel
 }
 
-// isInjectiveLookup returns whether every lookup with the given key expressions is guarenteed to return
+// isInjectiveLookup returns whether every lookup with the given key expressions is guaranteed to return
 // at most one row.
 func isInjectiveLookup(idx *Index, joinBase *JoinBase, keyExprs []sql.Expression, nullMask []bool) bool {
 	if !idx.SqlIdx().IsUnique() {

diff --git a/sql/memo/rel_props.go b/sql/memo/rel_props.go
@@ -326,20 +326,33 @@ func (m *Memo) CardMemoGroups(ctx *sql.Context, g *ExprGroup) {
 	g.RelProps.SetStats(s)
 }
 
+func estimatedCardinalityStats(jp *JoinBase) sql.Statistic {
+	left := jp.Left.RelProps.GetStats()
+	right := jp.Right.RelProps.GetStats()
+
+	distinct := math.Max(float64(left.DistinctCount()), float64(right.DistinctCount()))
+	if distinct == 0 {
+		m := math.Max(float64(left.RowCount()), float64(right.RowCount()))
+		distinct = m * .80
+	}
+
+	// Assume that the smaller set is surjective onto the larger set, and at least one of the sets is uniformly distributed.
+	// If so, then the odds that a random element of each set matches can be computed as:
+	selectivity := 1.0 / float64(distinct)
+	card := uint64(float64(left.RowCount()*right.RowCount()) * selectivity)
+	return &stats.Statistic{RowCnt: card}
+}
+
 func (m *Memo) statsForRel(ctx *sql.Context, rel RelExpr) sql.Statistic {
 	m.Tracer.PushDebugContext("statsForRel")
 	defer m.Tracer.PopDebugContext()
 
 	var stat sql.Statistic
 	switch rel := rel.(type) {
 	case JoinRel:
-		// different joins use different ways to estimate cardinality of outputs
-		jp := rel.JoinPrivate()
-		left := jp.Left.RelProps.GetStats()
-		right := jp.Right.RelProps.GetStats()
-
+		estimatedCardStats := estimatedCardinalityStats(rel.JoinPrivate())
+		smallestLeft := estimatedCardStats
 		var injective bool
-		var smallestLeft sql.Statistic
 		var mergeStats sql.Statistic
 		var n RelExpr = rel
 		var done bool
@@ -348,7 +361,7 @@ func (m *Memo) statsForRel(ctx *sql.Context, rel RelExpr) sql.Statistic {
 			case *LookupJoin:
 				if n.Injective {
 					injective = true
-					if smallestLeft == nil || n.Left.RelProps.GetStats().RowCount() < smallestLeft.RowCount() {
+					if n.Left.RelProps.GetStats().RowCount() < smallestLeft.RowCount() {
 						smallestLeft = n.Left.RelProps.GetStats()
 					}
 				}
@@ -397,18 +410,7 @@ func (m *Memo) statsForRel(ctx *sql.Context, rel RelExpr) sql.Statistic {
 			return mergeStats
 		}
 
-		distinct := math.Max(float64(left.DistinctCount()), float64(right.DistinctCount()))
-		if distinct == 0 {
-			m := math.Max(float64(left.RowCount()), float64(right.RowCount()))
-			distinct = m * .80
-		}
-
-		// Assume that the smaller set is surjective onto the larger set, and at least one of the sets is uniformly distributed.
-		// If so, then the odds that a random element of each set matches can be computed as:
-		selectivity := 1.0 / float64(distinct)
-		card := float64(left.RowCount()*right.RowCount()) * selectivity
-		return &stats.Statistic{RowCnt: uint64(card)}
-
+		return estimatedCardStats
 	case *Max1Row:
 		stat = &stats.Statistic{RowCnt: 1}