-
-
Notifications
You must be signed in to change notification settings - Fork 238
Add partial equivs to left join functional dependencies if right-side column is not nullable #3288
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4519b72
06b17bd
637c3f1
a5eef0f
393ab2f
149d319
cbda6af
9881365
f784ebd
cadd8ca
69dd180
1c117ef
95289f1
cac6646
e9af3e2
5e5d6af
de5f1c2
9462c1e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,13 +43,17 @@ func (e *EquivSets) Sets() []ColSet { | |
| } | ||
|
|
||
| func (e *EquivSets) String() string { | ||
| return e.StringWithLabel("equiv") | ||
| } | ||
|
|
||
| func (e *EquivSets) StringWithLabel(label string) string { | ||
| if e == nil { | ||
| return "equiv()" | ||
| return fmt.Sprintf("%s()", label) | ||
| } | ||
| b := strings.Builder{} | ||
| sep := "" | ||
| for i, set := range e.sets { | ||
| b.WriteString(fmt.Sprintf("%sequiv%s", sep, set)) | ||
| b.WriteString(fmt.Sprintf("%s%s%s", sep, label, set)) | ||
| if i == 0 { | ||
| sep = "; " | ||
| } | ||
|
|
@@ -102,18 +106,6 @@ func (k *Key) implies(other Key) bool { | |
| // a fraction of the total input set. The first key always determines | ||
| // the entire relation, which seems good enough for many cases. | ||
| // Maintaining partials sets also requires much less bookkeeping. | ||
| // | ||
| // TODO: We used to not track dependency sets and only add keys that | ||
| // determined the entire relation. One observed downside of that approach | ||
| // is that left joins fail to convert equivalencies on the null-extended | ||
| // side to lax functional dependencies. For example, in the query below, | ||
| // the left join loses (a) == (m) because (m) can now be NULL: | ||
| // | ||
| // SELECT * from adbcd LEFT_JOIN mnpq WHERE a = m | ||
| // | ||
| // But we could maintain (m)~~>(n), which higher-level null enforcement | ||
| // (ex: GROUPING) can reclaim as equivalence. Although we now track partial | ||
| // dependency sets, this may still not be supported. | ||
| type FuncDepSet struct { | ||
| // all columns in this relation | ||
| all ColSet | ||
|
|
@@ -123,6 +115,8 @@ type FuncDepSet struct { | |
| consts ColSet | ||
| // tracks in-scope equivalent closure | ||
| equivs *EquivSets | ||
| // tracks partial equivalent closure. This is used for left joins, where the right side is null-extended | ||
| partialEquivs *EquivSets | ||
| // keys includes the set of primary and secondary keys | ||
| // accumulated in the relation. The first key is the best | ||
| // key we have seen so far, where strict > lax and shorter | ||
|
|
@@ -213,22 +207,25 @@ func (f *FuncDepSet) String() string { | |
| b.WriteString(fmt.Sprintf("%s%s", sep, f.equivs)) | ||
| sep = "; " | ||
| } | ||
| if len(f.keys) < 2 { | ||
| return b.String() | ||
| if f.partialEquivs.Len() > 0 { | ||
| b.WriteString(fmt.Sprintf("%s%s", sep, f.partialEquivs.StringWithLabel("partialEquiv"))) | ||
| sep = "; " | ||
| } | ||
| for _, k := range f.keys[1:] { | ||
| var cols string | ||
| if k.allCols == f.all { | ||
| cols = k.cols.String() | ||
| } else { | ||
| cols = fmt.Sprintf("%s/%s", k.cols, k.allCols) | ||
| } | ||
| if k.strict { | ||
| b.WriteString(fmt.Sprintf("%sfd%s", sep, cols)) | ||
| } else { | ||
| b.WriteString(fmt.Sprintf("%slax-fd%s", sep, cols)) | ||
| if len(f.keys) >= 2 { | ||
| for _, k := range f.keys[1:] { | ||
| var cols string | ||
| if k.allCols == f.all { | ||
| cols = k.cols.String() | ||
| } else { | ||
| cols = fmt.Sprintf("%s/%s", k.cols, k.allCols) | ||
| } | ||
| if k.strict { | ||
| b.WriteString(fmt.Sprintf("%sfd%s", sep, cols)) | ||
| } else { | ||
| b.WriteString(fmt.Sprintf("%slax-fd%s", sep, cols)) | ||
| } | ||
| sep = "; " | ||
| } | ||
| sep = "; " | ||
| } | ||
| return b.String() | ||
| } | ||
|
|
@@ -238,7 +235,8 @@ func (f *FuncDepSet) Constants() ColSet { | |
| } | ||
|
|
||
| func (f *FuncDepSet) EquivalenceClosure(cols ColSet) ColSet { | ||
| for _, set := range f.equivs.Sets() { | ||
| equivSets := append(f.equivs.Sets(), f.partialEquivs.Sets()...) | ||
| for _, set := range equivSets { | ||
| if set.Intersects(cols) { | ||
| cols = cols.Union(set) | ||
| } | ||
|
|
@@ -257,9 +255,6 @@ func (f *FuncDepSet) AddConstants(cols ColSet) { | |
|
|
||
| func (f *FuncDepSet) AddEquiv(i, j ColumnId) { | ||
| cols := NewColSet(i, j) | ||
| if f.equivs == nil { | ||
| f.equivs = &EquivSets{} | ||
| } | ||
| f.AddEquivSet(cols) | ||
| } | ||
|
|
||
|
|
@@ -276,6 +271,14 @@ func (f *FuncDepSet) AddEquivSet(cols ColSet) { | |
| } | ||
| } | ||
|
|
||
| func (f *FuncDepSet) AddPartialEquiv(i, j ColumnId) { | ||
| cols := NewColSet(i, j) | ||
| if f.partialEquivs == nil { | ||
| f.partialEquivs = &EquivSets{} | ||
| } | ||
| f.partialEquivs.Add(cols) | ||
| } | ||
|
|
||
| func (f *FuncDepSet) AddKey(k Key) { | ||
| switch k.strict { | ||
| case true: | ||
|
|
@@ -661,10 +664,17 @@ func NewLeftJoinFDs(left, right *FuncDepSet, filters [][2]ColumnId) *FuncDepSet | |
| } | ||
| ret.AddConstants(leftConst) | ||
| } | ||
| // only left equiv holds | ||
|
|
||
| // add left equivs | ||
| for _, equiv := range left.equivs.Sets() { | ||
| ret.AddEquivSet(equiv) | ||
| } | ||
| // add partial equiv filters if right-side column is not nullable | ||
| for _, f := range filters { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Be careful, filters might be more than just equality checks. We don't want to accidentally add an vigilance if there's a filter like |
||
| if right.notNull.Contains(f[0]) || right.notNull.Contains(f[1]) { | ||
| ret.AddPartialEquiv(f[0], f[1]) | ||
| } | ||
| } | ||
|
|
||
| if leftStrict && leftColsAreInnerJoinKey { | ||
| strictKey := Key{strict: true, allCols: ret.all, cols: leftKey} | ||
|
|
@@ -676,10 +686,6 @@ func NewLeftJoinFDs(left, right *FuncDepSet, filters [][2]ColumnId) *FuncDepSet | |
| ret.keys = append(ret.keys, jKey) | ||
| } | ||
|
|
||
| // no filter equivs are valid | ||
| // TODO if right columns are non-nullable in ON filter, equivs hold | ||
| // technically we could do (r)~~>(l), but is this useful? | ||
|
|
||
| // right-side keys become lax unless all non-nullable in original | ||
| for _, key := range rightKeys { | ||
| if !key.cols.SubsetOf(right.notNull) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -348,7 +348,7 @@ func TestFuncDeps_LeftJoin(t *testing.T) { | |
| join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{}) | ||
| assert.Equal(t, "key(1,6,7); equiv(6,8,9); lax-fd(3)/(1-5)", join.String()) | ||
| }) | ||
| t.Run("join filter equiv", func(t *testing.T) { | ||
| t.Run("join filter partial equiv", func(t *testing.T) { | ||
| // SELECT * FROM abcde RIGHT OUTER JOIN mnpq ON a=m | ||
| abcde := &FuncDepSet{all: cols(1, 2, 3, 4, 5)} | ||
| abcde.AddNotNullable(cols(1)) | ||
|
|
@@ -359,6 +359,19 @@ func TestFuncDeps_LeftJoin(t *testing.T) { | |
| mnpq.AddNotNullable(cols(6, 7)) | ||
| mnpq.AddStrictKey(cols(6, 7)) | ||
|
|
||
| join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}}) | ||
| assert.Equal(t, "key(6,7); partialEquiv(1,6); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String()) | ||
| }) | ||
| t.Run("join filter no partial equiv", func(t *testing.T) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having a new func deps test is good. I think we also want a new plan test based on plans that we expect to be optimized by this but weren't before. |
||
| // SELECT * FROM abcde RIGHT OUTER JOIN mnpq ON a=m | ||
| abcde := &FuncDepSet{all: cols(1, 2, 3, 4, 5)} | ||
| abcde.AddStrictKey(cols(1)) | ||
| abcde.AddLaxKey(cols(2, 3)) | ||
|
|
||
| mnpq := &FuncDepSet{all: cols(6, 7, 8, 9)} | ||
| mnpq.AddNotNullable(cols(6, 7)) | ||
| mnpq.AddStrictKey(cols(6, 7)) | ||
|
|
||
| join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}}) | ||
| assert.Equal(t, "key(6,7); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String()) | ||
| }) | ||
|
|
@@ -374,7 +387,7 @@ func TestFuncDeps_LeftJoin(t *testing.T) { | |
| mnpq.AddStrictKey(cols(6, 7)) | ||
|
|
||
| join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}, {1, 2}}) | ||
| assert.Equal(t, "key(6,7); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String()) | ||
| assert.Equal(t, "key(6,7); partialEquiv(1,2,6); fd(1)/(1-5); lax-fd(2,3)/(1-5)", join.String()) | ||
| }) | ||
| t.Run("max1Row left join", func(t *testing.T) { | ||
| abcde := &FuncDepSet{all: cols(1, 2, 3, 4, 5)} | ||
|
|
@@ -390,7 +403,7 @@ func TestFuncDeps_LeftJoin(t *testing.T) { | |
| mnpq.AddStrictKey(cols(6, 7)) | ||
|
|
||
| join := NewLeftJoinFDs(mnpq, abcde, [][2]ColumnId{{1, 6}, {1, 2}}) | ||
| assert.Equal(t, "key(); constant(1,6,7)", join.String()) | ||
| assert.Equal(t, "key(); constant(1,6,7); partialEquiv(1,2,6)", join.String()) | ||
| }) | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the comment needs to be more precise about what "partial equivalent closure" means.
I also think that we might want to reconsider the name, both to make it more clear but also because the comments in this section also talk about "partial keys" (aka sets of columns that constrain some but not all of the other columns) and these are two unrelated concepts that we don't want to be potentially confused with each other.