Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,917 changes: 1,469 additions & 1,448 deletions enginetest/queries/imdb_plans.go

Large diffs are not rendered by default.

7,556 changes: 3,434 additions & 4,122 deletions enginetest/queries/integration_plans.go

Large diffs are not rendered by default.

1,861 changes: 835 additions & 1,026 deletions enginetest/queries/query_plans.go

Large diffs are not rendered by default.

2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ github.com/dolthub/jsonpath v0.0.2-0.20240227200619-19675ab05c71 h1:bMGS25NWAGTE
github.com/dolthub/jsonpath v0.0.2-0.20240227200619-19675ab05c71/go.mod h1:2/2zjLQ/JOOSbbSboojeg+cAwcRV0fDLzIiWch/lhqI=
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9XGFa6q5Ap4Z/OhNkAMBaK5YeuEzwJt+NZdhiE=
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY=
github.com/dolthub/vitess v0.0.0-20250930230441-70c2c6a98e33 h1:ScHTwNbcVC6JH1OSyXzj8S4w67BIpRXwTSjrac3/PSw=
github.com/dolthub/vitess v0.0.0-20250930230441-70c2c6a98e33/go.mod h1:8pvvk5OLaLN9LLxghyczUapn/97l+mBgIb10qC1LG84=
github.com/dolthub/vitess v0.0.0-20251031205214-d09b65bd77b0 h1:RXopPQP1bwb5fsnXAC89joqk/3pIgQnQSU8lAHJhue0=
github.com/dolthub/vitess v0.0.0-20251031205214-d09b65bd77b0/go.mod h1:FLWqdXsAeeBQyFwDjmBVu0GnbjI2MKeRf3tRVdJEKlI=
github.com/dolthub/vitess v0.0.0-20251105091622-b08b393fd9b1 h1:2uiHo4gkf2n/Cw9uCBDkCWj35Vz48Uhif2B9P+DqgCg=
Expand Down
80 changes: 43 additions & 37 deletions sql/func_deps.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,17 @@ func (e *EquivSets) Sets() []ColSet {
}

func (e *EquivSets) String() string {
return e.StringWithLabel("equiv")
}

func (e *EquivSets) StringWithLabel(label string) string {
if e == nil {
return "equiv()"
return fmt.Sprintf("%s()", label)
}
b := strings.Builder{}
sep := ""
for i, set := range e.sets {
b.WriteString(fmt.Sprintf("%sequiv%s", sep, set))
b.WriteString(fmt.Sprintf("%s%s%s", sep, label, set))
if i == 0 {
sep = "; "
}
Expand Down Expand Up @@ -102,18 +106,6 @@ func (k *Key) implies(other Key) bool {
// a fraction of the total input set. The first key always determines
// the entire relation, which seems good enough for many cases.
// Maintaining partials sets also requires much less bookkeeping.
//
// TODO: We used to not track dependency sets and only add keys that
// determined the entire relation. One observed downside of that approach
// is that left joins fail to convert equivalencies on the null-extended
// side to lax functional dependencies. For example, in the query below,
// the left join loses (a) == (m) because (m) can now be NULL:
//
// SELECT * from adbcd LEFT_JOIN mnpq WHERE a = m
//
// But we could maintain (m)~~>(n), which higher-level null enforcement
// (ex: GROUPING) can reclaim as equivalence. Although we now track partial
// dependency sets, this may still not be supported.
type FuncDepSet struct {
// all columns in this relation
all ColSet
Expand All @@ -123,6 +115,8 @@ type FuncDepSet struct {
consts ColSet
// tracks in-scope equivalent closure
equivs *EquivSets
// tracks partial equivalent closure. This is used for left joins, where the right side is null-extended
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the comment needs to be more precise about what "partial equivalent closure" means.

I also think that we might want to reconsider the name, both to make it more clear but also because the comments in this section also talk about "partial keys" (aka sets of columns that constrain some but not all of the other columns) and these are two unrelated concepts that we don't want to be potentially confused with each other.

partialEquivs *EquivSets
// keys includes the set of primary and secondary keys
// accumulated in the relation. The first key is the best
// key we have seen so far, where strict > lax and shorter
Expand Down Expand Up @@ -213,22 +207,25 @@ func (f *FuncDepSet) String() string {
b.WriteString(fmt.Sprintf("%s%s", sep, f.equivs))
sep = "; "
}
if len(f.keys) < 2 {
return b.String()
if f.partialEquivs.Len() > 0 {
b.WriteString(fmt.Sprintf("%s%s", sep, f.partialEquivs.StringWithLabel("partialEquiv")))
sep = "; "
}
for _, k := range f.keys[1:] {
var cols string
if k.allCols == f.all {
cols = k.cols.String()
} else {
cols = fmt.Sprintf("%s/%s", k.cols, k.allCols)
}
if k.strict {
b.WriteString(fmt.Sprintf("%sfd%s", sep, cols))
} else {
b.WriteString(fmt.Sprintf("%slax-fd%s", sep, cols))
if len(f.keys) >= 2 {
for _, k := range f.keys[1:] {
var cols string
if k.allCols == f.all {
cols = k.cols.String()
} else {
cols = fmt.Sprintf("%s/%s", k.cols, k.allCols)
}
if k.strict {
b.WriteString(fmt.Sprintf("%sfd%s", sep, cols))
} else {
b.WriteString(fmt.Sprintf("%slax-fd%s", sep, cols))
}
sep = "; "
}
sep = "; "
}
return b.String()
}
Expand All @@ -238,7 +235,8 @@ func (f *FuncDepSet) Constants() ColSet {
}

func (f *FuncDepSet) EquivalenceClosure(cols ColSet) ColSet {
for _, set := range f.equivs.Sets() {
equivSets := append(f.equivs.Sets(), f.partialEquivs.Sets()...)
for _, set := range equivSets {
if set.Intersects(cols) {
cols = cols.Union(set)
}
Expand All @@ -257,9 +255,6 @@ func (f *FuncDepSet) AddConstants(cols ColSet) {

func (f *FuncDepSet) AddEquiv(i, j ColumnId) {
cols := NewColSet(i, j)
if f.equivs == nil {
f.equivs = &EquivSets{}
}
f.AddEquivSet(cols)
}

Expand All @@ -276,6 +271,14 @@ func (f *FuncDepSet) AddEquivSet(cols ColSet) {
}
}

func (f *FuncDepSet) AddPartialEquiv(i, j ColumnId) {
cols := NewColSet(i, j)
if f.partialEquivs == nil {
f.partialEquivs = &EquivSets{}
}
f.partialEquivs.Add(cols)
}

func (f *FuncDepSet) AddKey(k Key) {
switch k.strict {
case true:
Expand Down Expand Up @@ -661,10 +664,17 @@ func NewLeftJoinFDs(left, right *FuncDepSet, filters [][2]ColumnId) *FuncDepSet
}
ret.AddConstants(leftConst)
}
// only left equiv holds

// add left equivs
for _, equiv := range left.equivs.Sets() {
ret.AddEquivSet(equiv)
}
// add partial equiv filters if right-side column is not nullable
for _, f := range filters {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Be careful, filters might be more than just equality checks. We don't want to accidentally add an vigilance if there's a filter like left.x > right.y. We should make sure there's a test that detects this, and add one if there isn't.

if right.notNull.Contains(f[0]) || right.notNull.Contains(f[1]) {
ret.AddPartialEquiv(f[0], f[1])
}
}

if leftStrict && leftColsAreInnerJoinKey {
strictKey := Key{strict: true, allCols: ret.all, cols: leftKey}
Expand All @@ -676,10 +686,6 @@ func NewLeftJoinFDs(left, right *FuncDepSet, filters [][2]ColumnId) *FuncDepSet
ret.keys = append(ret.keys, jKey)
}

// no filter equivs are valid
// TODO if right columns are non-nullable in ON filter, equivs hold
// technically we could do (r)~~>(l), but is this useful?

// right-side keys become lax unless all non-nullable in original
for _, key := range rightKeys {
if !key.cols.SubsetOf(right.notNull) {
Expand Down
19 changes: 16 additions & 3 deletions sql/func_deps_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ func TestFuncDeps_LeftJoin(t *testing.T) {
join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{})
assert.Equal(t, "key(1,6,7); equiv(6,8,9); lax-fd(3)/(1-5)", join.String())
})
t.Run("join filter equiv", func(t *testing.T) {
t.Run("join filter partial equiv", func(t *testing.T) {
// SELECT * FROM abcde RIGHT OUTER JOIN mnpq ON a=m
abcde := &FuncDepSet{all: cols(1, 2, 3, 4, 5)}
abcde.AddNotNullable(cols(1))
Expand All @@ -359,6 +359,19 @@ func TestFuncDeps_LeftJoin(t *testing.T) {
mnpq.AddNotNullable(cols(6, 7))
mnpq.AddStrictKey(cols(6, 7))

join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}})
assert.Equal(t, "key(6,7); partialEquiv(1,6); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String())
})
t.Run("join filter no partial equiv", func(t *testing.T) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having a new func deps test is good. I think we also want a new plan test based on plans that we expect to be optimized by this but weren't before.

// SELECT * FROM abcde RIGHT OUTER JOIN mnpq ON a=m
abcde := &FuncDepSet{all: cols(1, 2, 3, 4, 5)}
abcde.AddStrictKey(cols(1))
abcde.AddLaxKey(cols(2, 3))

mnpq := &FuncDepSet{all: cols(6, 7, 8, 9)}
mnpq.AddNotNullable(cols(6, 7))
mnpq.AddStrictKey(cols(6, 7))

join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}})
assert.Equal(t, "key(6,7); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String())
})
Expand All @@ -374,7 +387,7 @@ func TestFuncDeps_LeftJoin(t *testing.T) {
mnpq.AddStrictKey(cols(6, 7))

join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}, {1, 2}})
assert.Equal(t, "key(6,7); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String())
assert.Equal(t, "key(6,7); partialEquiv(1,2,6); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String())
})
t.Run("max1Row left join", func(t *testing.T) {
abcde := &FuncDepSet{all: cols(1, 2, 3, 4, 5)}
Expand All @@ -390,7 +403,7 @@ func TestFuncDeps_LeftJoin(t *testing.T) {
mnpq.AddStrictKey(cols(6, 7))

join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}, {1, 2}})
assert.Equal(t, "key(); constant(1,6,7)", join.String())
assert.Equal(t, "key(); constant(1,6,7); partialEquiv(1,2,6)", join.String())
})
}

Expand Down
2 changes: 1 addition & 1 deletion sql/memo/coster.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ func lookupJoinSelectivity(l *IndexScan, joinBase *JoinBase) float64 {
return math.Pow(perKeyCostReductionFactor, float64(len(l.Table.Expressions()))) * optimisticJoinSel
}

// isInjectiveLookup returns whether every lookup with the given key expressions is guarenteed to return
// isInjectiveLookup returns whether every lookup with the given key expressions is guaranteed to return
// at most one row.
func isInjectiveLookup(idx *Index, joinBase *JoinBase, keyExprs []sql.Expression, nullMask []bool) bool {
if !idx.SqlIdx().IsUnique() {
Expand Down
40 changes: 21 additions & 19 deletions sql/memo/rel_props.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,20 +326,33 @@ func (m *Memo) CardMemoGroups(ctx *sql.Context, g *ExprGroup) {
g.RelProps.SetStats(s)
}

func estimatedCardinalityStats(jp *JoinBase) sql.Statistic {
left := jp.Left.RelProps.GetStats()
right := jp.Right.RelProps.GetStats()

distinct := math.Max(float64(left.DistinctCount()), float64(right.DistinctCount()))
if distinct == 0 {
m := math.Max(float64(left.RowCount()), float64(right.RowCount()))
distinct = m * .80
}

// Assume that the smaller set is surjective onto the larger set, and at least one of the sets is uniformly distributed.
// If so, then the odds that a random element of each set matches can be computed as:
selectivity := 1.0 / float64(distinct)
card := uint64(float64(left.RowCount()*right.RowCount()) * selectivity)
return &stats.Statistic{RowCnt: card}
}

func (m *Memo) statsForRel(ctx *sql.Context, rel RelExpr) sql.Statistic {
m.Tracer.PushDebugContext("statsForRel")
defer m.Tracer.PopDebugContext()

var stat sql.Statistic
switch rel := rel.(type) {
case JoinRel:
// different joins use different ways to estimate cardinality of outputs
jp := rel.JoinPrivate()
left := jp.Left.RelProps.GetStats()
right := jp.Right.RelProps.GetStats()

estimatedCardStats := estimatedCardinalityStats(rel.JoinPrivate())
smallestLeft := estimatedCardStats
var injective bool
var smallestLeft sql.Statistic
var mergeStats sql.Statistic
var n RelExpr = rel
var done bool
Expand All @@ -348,7 +361,7 @@ func (m *Memo) statsForRel(ctx *sql.Context, rel RelExpr) sql.Statistic {
case *LookupJoin:
if n.Injective {
injective = true
if smallestLeft == nil || n.Left.RelProps.GetStats().RowCount() < smallestLeft.RowCount() {
if n.Left.RelProps.GetStats().RowCount() < smallestLeft.RowCount() {
smallestLeft = n.Left.RelProps.GetStats()
}
}
Expand Down Expand Up @@ -397,18 +410,7 @@ func (m *Memo) statsForRel(ctx *sql.Context, rel RelExpr) sql.Statistic {
return mergeStats
}

distinct := math.Max(float64(left.DistinctCount()), float64(right.DistinctCount()))
if distinct == 0 {
m := math.Max(float64(left.RowCount()), float64(right.RowCount()))
distinct = m * .80
}

// Assume that the smaller set is surjective onto the larger set, and at least one of the sets is uniformly distributed.
// If so, then the odds that a random element of each set matches can be computed as:
selectivity := 1.0 / float64(distinct)
card := float64(left.RowCount()*right.RowCount()) * selectivity
return &stats.Statistic{RowCnt: uint64(card)}

return estimatedCardStats
case *Max1Row:
stat = &stats.Statistic{RowCnt: 1}

Expand Down
Loading