Skip to content

feat: support Substrait 0.3.0 #14

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions rs/src/input/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,30 @@ impl<T: ProtoPrimitive> InputNode for T {
}
}

impl InputNode for () {
fn type_to_node() -> tree::Node {
tree::NodeType::ProtoMessage("google.protobuf.Empty").into()
}

fn data_to_node(&self) -> tree::Node {
tree::NodeType::ProtoMessage("google.protobuf.Empty").into()
}

fn oneof_variant(&self) -> Option<&'static str> {
None
}

fn parse_unknown(&self, _context: &mut context::Context<'_>) -> bool {
false
}
}

impl ProtoMessage for () {
fn proto_message_type() -> &'static str {
"google.protobuf.Empty"
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
6 changes: 6 additions & 0 deletions rs/src/output/diagnostic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ pub enum Classification {
#[strum(props(Description = "illegal glob"))]
IllegalGlob = 5,

#[strum(props(Description = "deprecation"))]
Deprecation = 6,

#[strum(props(HiddenDescription = "experimental"))]
Experimental = 999,

Expand Down Expand Up @@ -231,6 +234,9 @@ pub enum Classification {
#[strum(props(Description = "failed to resolve type variation name"))]
LinkMissingTypeVariationName = 3004,

#[strum(props(HiddenDescription = "use of anchor zero"))]
LinkAnchorZero = 3005,

// Type-related diagnostics (group 4).
#[strum(props(HiddenDescription = "type-related diagnostics"))]
Type = 4000,
Expand Down
36 changes: 19 additions & 17 deletions rs/src/parse/expressions/conditionals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ pub fn parse_if_then(

// Save to the "arguments" of the function we'll use to describe this
// expression.
args.push(condition);
args.push(value);
args.push(condition.into());
args.push(value.into());

Ok(())
});
Expand All @@ -76,14 +76,14 @@ pub fn parse_if_then(

// Save to the "arguments" of the function we'll use to describe this
// expression.
args.push(value);
args.push(value.into());
} else {
// Allow missing else, making the type nullable.
comment!(y, "Otherwise, yield null.");
return_type = return_type.make_nullable();

// Yield null for the else clause.
args.push(expressions::Expression::new_null(return_type.clone()));
args.push(expressions::Expression::new_null(return_type.clone()).into());
}

// Describe node.
Expand All @@ -110,7 +110,7 @@ pub fn parse_switch(
// Parse value to match.
let (n, e) = proto_boxed_required_field!(x, y, r#match, expressions::parse_expression);
let mut match_type = n.data_type();
args.push(e.unwrap_or_default());
args.push(e.unwrap_or_default().into());

// Handle branches.
proto_required_repeated_field!(x, y, ifs, |x, y| {
Expand Down Expand Up @@ -143,8 +143,8 @@ pub fn parse_switch(

// Save to the "arguments" of the function we'll use to describe this
// expression.
args.push(match_value.into());
args.push(value);
args.push(expressions::Expression::from(match_value).into());
args.push(value.into());

Ok(())
});
Expand All @@ -165,14 +165,14 @@ pub fn parse_switch(

// Save to the "arguments" of the function we'll use to describe this
// expression.
args.push(value);
args.push(value.into());
} else {
// Allow missing else, making the type nullable.
comment!(y, "Otherwise, yield null.");
return_type = return_type.make_nullable();

// Yield null for the else clause.
args.push(expressions::Expression::new_null(return_type.clone()));
args.push(expressions::Expression::new_null(return_type.clone()).into());
}

// Describe node.
Expand Down Expand Up @@ -200,13 +200,13 @@ pub fn parse_singular_or_list(
// Parse value to match.
let (n, e) = proto_boxed_required_field!(x, y, value, expressions::parse_expression);
let match_type = n.data_type();
args.push(e.unwrap_or_default());
args.push(e.unwrap_or_default().into());

// Handle allowed values.
proto_required_repeated_field!(x, y, options, |x, y| {
let expression = expressions::parse_expression(x, y)?;
let value_type = y.data_type();
args.push(expression);
args.push(expression.into());

// Check that the type is the same as the value.
types::assert_equal(
Expand Down Expand Up @@ -249,17 +249,19 @@ pub fn parse_multi_or_list(
// Parse value to match.
let (ns, es) = proto_required_repeated_field!(x, y, value, expressions::parse_expression);
let match_types = ns.iter().map(|x| x.data_type()).collect::<Vec<_>>();
args.push(expressions::Expression::Tuple(
es.into_iter().map(|x| x.unwrap_or_default()).collect(),
));
args.push(
expressions::Expression::Tuple(es.into_iter().map(|x| x.unwrap_or_default()).collect())
.into(),
);

// Handle allowed values.
proto_required_repeated_field!(x, y, options, |x, y| {
let (ns, es) = proto_required_repeated_field!(x, y, fields, expressions::parse_expression);
let value_types = ns.iter().map(|x| x.data_type()).collect::<Vec<_>>();
args.push(expressions::Expression::Tuple(
es.into_iter().map(|x| x.unwrap_or_default()).collect(),
));
args.push(
expressions::Expression::Tuple(es.into_iter().map(|x| x.unwrap_or_default()).collect())
.into(),
);

// Check that the type is the same as the value.
if match_types.len() != value_types.len() {
Expand Down
167 changes: 159 additions & 8 deletions rs/src/parse/expressions/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,119 @@ use crate::parse::expressions;
use crate::parse::extensions;
use crate::parse::sorts;
use crate::parse::types;
use crate::util;
use crate::util::string::Describe;
use std::sync::Arc;

/// A function argument; either a value, a type, or an enum option.
#[derive(Clone, Debug, PartialEq)]
pub enum FunctionArgument {
/// Used for value arguments or normal expressions.
Value(expressions::Expression),

/// Used for type arguments.
Type(Arc<data_type::DataType>),

/// Used for enum option arguments.
Enum(Option<String>),
}

impl Default for FunctionArgument {
fn default() -> Self {
FunctionArgument::Value(expressions::Expression::default())
}
}

impl From<expressions::Expression> for FunctionArgument {
fn from(expr: expressions::Expression) -> Self {
FunctionArgument::Value(expr)
}
}

impl Describe for FunctionArgument {
fn describe(
&self,
f: &mut std::fmt::Formatter<'_>,
limit: util::string::Limit,
) -> std::fmt::Result {
match self {
FunctionArgument::Value(e) => e.describe(f, limit),
FunctionArgument::Type(e) => e.describe(f, limit),
FunctionArgument::Enum(Some(x)) => util::string::describe_identifier(f, x, limit),
FunctionArgument::Enum(None) => write!(f, "-"),
}
}
}

impl std::fmt::Display for FunctionArgument {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.display().fmt(f)
}
}

/// Parse an enum option argument type.
fn parse_enum_type(
x: &substrait::function_argument::r#enum::EnumKind,
_y: &mut context::Context,
) -> diagnostic::Result<Option<String>> {
match x {
substrait::function_argument::r#enum::EnumKind::Specified(x) => Ok(Some(x.clone())),
substrait::function_argument::r#enum::EnumKind::Unspecified(_) => Ok(None),
}
}

/// Parse an enum option argument.
fn parse_enum(
x: &substrait::function_argument::Enum,
y: &mut context::Context,
) -> diagnostic::Result<Option<String>> {
Ok(proto_required_field!(x, y, enum_kind, parse_enum_type)
.1
.flatten())
}

/// Parse a 0.3.0+ function argument type.
fn parse_function_argument_type(
x: &substrait::function_argument::ArgType,
y: &mut context::Context,
) -> diagnostic::Result<FunctionArgument> {
match x {
substrait::function_argument::ArgType::Enum(x) => {
Ok(FunctionArgument::Enum(parse_enum(x, y)?))
}
substrait::function_argument::ArgType::Type(x) => {
types::parse_type(x, y)?;
Ok(FunctionArgument::Type(y.data_type()))
}
substrait::function_argument::ArgType::Value(x) => Ok(FunctionArgument::Value(
expressions::parse_expression(x, y)?,
)),
}
}

/// Parse a 0.3.0+ function argument.
fn parse_function_argument(
x: &substrait::FunctionArgument,
y: &mut context::Context,
) -> diagnostic::Result<FunctionArgument> {
Ok(
proto_required_field!(x, y, arg_type, parse_function_argument_type)
.1
.unwrap_or_default(),
)
}

/// Parse a pre-0.3.0 function argument expression.
fn parse_legacy_function_argument(
x: &substrait::Expression,
y: &mut context::Context,
) -> diagnostic::Result<FunctionArgument> {
expressions::parse_legacy_function_argument(x, y).map(|x| match x {
expressions::ExpressionOrEnum::Value(x) => FunctionArgument::Value(x),
expressions::ExpressionOrEnum::Enum(x) => FunctionArgument::Enum(x),
})
}

/// Matches a function call with its YAML definition, yielding its return type.
/// Yields an unresolved type if resolution fails.
pub fn check_function(
Expand Down Expand Up @@ -41,7 +152,8 @@ pub fn check_function(
fn parse_function(
y: &mut context::Context,
function: Option<Arc<extension::Reference<extension::Function>>>,
arguments: (Vec<Arc<tree::Node>>, Vec<Option<expressions::Expression>>),
arguments: (Vec<Arc<tree::Node>>, Vec<Option<FunctionArgument>>),
legacy_arguments: (Vec<Arc<tree::Node>>, Vec<Option<FunctionArgument>>),
return_type: Arc<data_type::DataType>,
) -> (Arc<data_type::DataType>, expressions::Expression) {
// Determine the name of the function.
Expand All @@ -50,6 +162,36 @@ fn parse_function(
.map(|x| x.name.to_string())
.unwrap_or_else(|| String::from("?"));

// Reconcile v3.0.0+ vs older function argument syntax.
let arguments = if legacy_arguments.1.is_empty() {
arguments
} else if arguments.1.is_empty() {
diagnostic!(
y,
Warning,
Deprecation,
"the args field for specifying function arguments was deprecated Substrait 0.3.0 (#161)"
);
legacy_arguments
} else {
if arguments != legacy_arguments {
diagnostic!(
y,
Error,
IllegalValue,
"mismatch between v0.3+ and legacy function argument specification"
);
comment!(
y,
"If both the v0.3+ and legacy syntax is used to specify function \
arguments, please make sure both map to the same arguments. If \
the argument pack is not representable using the legacy syntax, \
do not use it."
);
}
arguments
};

// Unpack the arguments into the function's enum options and regular
// arguments.
let mut opt_values = vec![];
Expand All @@ -61,7 +203,7 @@ fn parse_function(
.into_iter()
.zip(arguments.1.into_iter().map(|x| x.unwrap_or_default()))
{
if let expressions::Expression::EnumVariant(x) = &expr {
if let FunctionArgument::Enum(x) = &expr {
if opt_exprs.is_empty() && !arg_exprs.is_empty() {
diagnostic!(
y,
Expand Down Expand Up @@ -122,13 +264,16 @@ pub fn parse_scalar_function(
extensions::simple::parse_function_reference
)
.1;
let arguments = proto_repeated_field!(x, y, args, expressions::parse_function_argument);
#[allow(deprecated)]
let legacy_arguments = proto_repeated_field!(x, y, args, parse_legacy_function_argument);
let arguments = proto_repeated_field!(x, y, arguments, parse_function_argument);
let return_type = proto_required_field!(x, y, output_type, types::parse_type)
.0
.data_type();

// Check function information.
let (return_type, expression) = parse_function(y, function, arguments, return_type);
let (return_type, expression) =
parse_function(y, function, arguments, legacy_arguments, return_type);

// Describe node.
y.set_data_type(return_type);
Expand Down Expand Up @@ -168,13 +313,16 @@ pub fn parse_window_function(
extensions::simple::parse_function_reference
)
.1;
let arguments = proto_repeated_field!(x, y, args, expressions::parse_function_argument);
#[allow(deprecated)]
let legacy_arguments = proto_repeated_field!(x, y, args, parse_legacy_function_argument);
let arguments = proto_repeated_field!(x, y, arguments, parse_function_argument);
let return_type = proto_required_field!(x, y, output_type, types::parse_type)
.0
.data_type();

// Check function information.
let (return_type, expression) = parse_function(y, function, arguments, return_type);
let (return_type, expression) =
parse_function(y, function, arguments, legacy_arguments, return_type);

// Parse modifiers.
proto_repeated_field!(x, y, partitions, expressions::parse_expression);
Expand Down Expand Up @@ -216,13 +364,16 @@ pub fn parse_aggregate_function(
extensions::simple::parse_function_reference
)
.1;
let arguments = proto_repeated_field!(x, y, args, expressions::parse_function_argument);
#[allow(deprecated)]
let legacy_arguments = proto_repeated_field!(x, y, args, parse_legacy_function_argument);
let arguments = proto_repeated_field!(x, y, arguments, parse_function_argument);
let return_type = proto_required_field!(x, y, output_type, types::parse_type)
.0
.data_type();

// Check function information.
let (return_type, expression) = parse_function(y, function, arguments, return_type);
let (return_type, expression) =
parse_function(y, function, arguments, legacy_arguments, return_type);

// Parse modifiers.
proto_repeated_field!(x, y, sorts, sorts::parse_sort_field);
Expand Down
Loading