Skip to content

Add pushdown on inner join that equals should not be null #811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,56 +23,115 @@ namespace FlowtideDotNet.Core.Optimizer.FilterPushdown
/// </summary>
internal class JoinFilterPushdownVisitor : OptimizerBaseVisitor
{
private static void TestPushdownNotNull(JoinRelation joinRelation, Expression expression, List<Expression> leftPushdowns, List<Expression> rightPushdowns)
{
if ((joinRelation.Type == JoinType.Inner || joinRelation.Type == JoinType.Left || joinRelation.Type == JoinType.Right) &&
MergeJoinFindVisitor.Check(joinRelation, expression, out var leftKey, out var rightKey))
{
if (joinRelation.Type == JoinType.Inner || joinRelation.Type == JoinType.Right)
{
if (leftKey.ReferenceSegment is StructReferenceSegment leftStruct)
{
leftPushdowns.Add(new ScalarFunction()
{
Arguments = new List<Expression>()
{
new DirectFieldReference()
{
ReferenceSegment = new StructReferenceSegment()
{
Field = leftStruct.Field,
Child = leftStruct.Child
}
}
},
ExtensionUri = FunctionsComparison.Uri,
ExtensionName = FunctionsComparison.IsNotNull
});
}
}

if (joinRelation.Type == JoinType.Inner || joinRelation.Type == JoinType.Left)
{
if (rightKey.ReferenceSegment is StructReferenceSegment rightStruct)
{
rightPushdowns.Add(new ScalarFunction()
{
Arguments = new List<Expression>()
{
new DirectFieldReference()
{
ReferenceSegment = new StructReferenceSegment()
{
Field = rightStruct.Field,
Child = rightStruct.Child
}
}
},
ExtensionUri = FunctionsComparison.Uri,
ExtensionName = FunctionsComparison.IsNotNull
});
}
}
}
}

public override Relation VisitJoinRelation(JoinRelation joinRelation, object state)
{
// Check root expression
var visitor = new JoinExpressionVisitor(joinRelation.Left.OutputLength);
visitor.Visit(joinRelation.Expression!, state);

List<Expression> leftPushdowns = new List<Expression>();
List<Expression> rightPushdowns = new List<Expression>();

if (joinRelation.Expression != null)
{
TestPushdownNotNull(joinRelation, joinRelation.Expression, leftPushdowns, rightPushdowns);

if (joinRelation.Expression is ScalarFunction scalarFunc &&
scalarFunc.ExtensionUri == FunctionsBoolean.Uri && scalarFunc.ExtensionName == FunctionsBoolean.And)
{
for (int i = 0; i < scalarFunc.Arguments.Count; i++)
{
TestPushdownNotNull(joinRelation, scalarFunc.Arguments[i], leftPushdowns, rightPushdowns);
}
}
}

if (!visitor.unknownCase)
{
// Only fields from left is used
if (visitor.fieldInLeft && !visitor.fieldInRight && joinRelation.Type == JoinType.Inner)
{
joinRelation.Left = new FilterRelation()
{
Condition = joinRelation.Expression!,
Input = joinRelation.Left
};
joinRelation.Expression = new BoolLiteral() { Value = true };
leftPushdowns.Add(new BoolLiteral() { Value = true });
}
// Only field in right is used
else if (!visitor.fieldInLeft && visitor.fieldInRight && joinRelation.Type == JoinType.Inner)
{
joinRelation.Right = new FilterRelation()
{
Condition = joinRelation.Expression!,
Input = joinRelation.Right
};
joinRelation.Expression = new BoolLiteral() { Value = true };
rightPushdowns.Add(new BoolLiteral() { Value = true });
}
}

if (joinRelation.Expression is ScalarFunction andFunctionScalar &&
andFunctionScalar.ExtensionUri == FunctionsBoolean.Uri &&
andFunctionScalar.ExtensionName == FunctionsBoolean.And)
{
List<Expression> leftPushDown = new List<Expression>();
List<Expression> rightPushDown = new List<Expression>();
for (int i = 0; i < andFunctionScalar.Arguments.Count; i++)
{
var expr = andFunctionScalar.Arguments[i];
var andVisitor = new JoinExpressionVisitor(joinRelation.Left.OutputLength);
andVisitor.Visit(expr, state);
if (andVisitor.fieldInLeft && !andVisitor.fieldInRight && joinRelation.Type == JoinType.Inner)
{
leftPushDown.Add(expr);
leftPushdowns.Add(expr);
andFunctionScalar.Arguments.RemoveAt(i);
i--;
}
// Only field in right is used
else if (!andVisitor.fieldInLeft && andVisitor.fieldInRight && joinRelation.Type == JoinType.Inner)
{
rightPushDown.Add(expr);
rightPushdowns.Add(expr);
andFunctionScalar.Arguments.RemoveAt(i);
i--;
}
Expand All @@ -85,64 +144,65 @@ public override Relation VisitJoinRelation(JoinRelation joinRelation, object sta
joinRelation.Expression = new BoolLiteral() { Value = true };
}
}
if (leftPushDown.Count > 0)
{
}

if (leftPushDown.Count == 1)
{
joinRelation.Left = new FilterRelation()
{
Condition = leftPushDown[0],
Input = joinRelation.Left
};
}
else
if (leftPushdowns.Count > 0)
{

if (leftPushdowns.Count == 1)
{
joinRelation.Left = new FilterRelation()
{
joinRelation.Left = new FilterRelation()
{
Condition = new ScalarFunction() { ExtensionUri = FunctionsBoolean.Uri, ExtensionName = FunctionsBoolean.And, Arguments = leftPushDown },
Input = joinRelation.Left
};
}
Condition = leftPushdowns[0],
Input = joinRelation.Left
};
}
if (rightPushDown.Count > 0)
else
{
// Find used fields
var usageVisitor = new ExpressionFieldUsageVisitor(joinRelation.Left.OutputLength);
foreach (var expr in rightPushDown)
joinRelation.Left = new FilterRelation()
{
usageVisitor.Visit(expr, default);
}
var rightUsageFields = usageVisitor.UsedFieldsRight.Distinct().ToList();
Condition = new ScalarFunction() { ExtensionUri = FunctionsBoolean.Uri, ExtensionName = FunctionsBoolean.And, Arguments = leftPushdowns },
Input = joinRelation.Left
};
}
}
if (rightPushdowns.Count > 0)
{
// Find used fields
var usageVisitor = new ExpressionFieldUsageVisitor(joinRelation.Left.OutputLength);
foreach (var expr in rightPushdowns)
{
usageVisitor.Visit(expr, default);
}
var rightUsageFields = usageVisitor.UsedFieldsRight.Distinct().ToList();

// Build lookup table from old to new field id
Dictionary<int, int> oldToNew = new Dictionary<int, int>();
foreach (var usedField in rightUsageFields)
{
oldToNew.Add(usedField, usedField - joinRelation.Left.OutputLength);
}
// Replace old ids with the new ids
var replaceVisitor = new ExpressionFieldReplaceVisitor(oldToNew);
foreach (var expr in rightPushDown)
{
replaceVisitor.Visit(expr, default);
}
if (rightPushDown.Count == 1)
// Build lookup table from old to new field id
Dictionary<int, int> oldToNew = new Dictionary<int, int>();
foreach (var usedField in rightUsageFields)
{
oldToNew.Add(usedField, usedField - joinRelation.Left.OutputLength);
}
// Replace old ids with the new ids
var replaceVisitor = new ExpressionFieldReplaceVisitor(oldToNew);
foreach (var expr in rightPushdowns)
{
replaceVisitor.Visit(expr, default);
}
if (rightPushdowns.Count == 1)
{
joinRelation.Right = new FilterRelation()
{
joinRelation.Right = new FilterRelation()
{
Condition = rightPushDown[0],
Input = joinRelation.Right
};
}
else
Condition = rightPushdowns[0],
Input = joinRelation.Right
};
}
else
{
joinRelation.Right = new FilterRelation()
{
joinRelation.Right = new FilterRelation()
{
Condition = new ScalarFunction() { ExtensionUri = FunctionsBoolean.Uri, ExtensionName = FunctionsBoolean.And, Arguments = rightPushDown },
Input = joinRelation.Right
};
}
Condition = new ScalarFunction() { ExtensionUri = FunctionsBoolean.Uri, ExtensionName = FunctionsBoolean.And, Arguments = rightPushdowns },
Input = joinRelation.Right
};
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/FlowtideDotNet.Core/Optimizer/MergeJoinFindVisitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace FlowtideDotNet.Core.Optimizer
{
internal class MergeJoinFindVisitor : OptimizerBaseVisitor
{
private bool Check(JoinRelation joinRelation, Expression? expression, [NotNullWhen(true)] out DirectFieldReference? leftKey, [NotNullWhen(true)] out DirectFieldReference? rightKey)
internal static bool Check(JoinRelation joinRelation, Expression? expression, [NotNullWhen(true)] out DirectFieldReference? leftKey, [NotNullWhen(true)] out DirectFieldReference? rightKey)
{
if (expression is ScalarFunction booleanComparison &&
booleanComparison.ExtensionUri == FunctionsComparison.Uri &&
Expand Down
Loading