Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package io.substrait.examples;

import io.substrait.examples.IsthmusAppExamples.Action;
import io.substrait.isthmus.ConverterProvider;
import io.substrait.isthmus.SqlToSubstrait;
import io.substrait.isthmus.sql.SubstraitCreateStatementParser;
import io.substrait.plan.Plan;
Expand All @@ -10,8 +11,8 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.prepare.CalciteCatalogReader;
import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.parser.SqlParseException;

/**
Expand All @@ -22,7 +23,7 @@
* <p>1. Create a fully typed schema for the inputs. Within a SQL context this represents the CREATE
* TABLE commands, which need to be converted to a Calcite Schema.
*
* <p>2. Parse the SQL query to convert (in the source SQL dialect).
* <p>2. Parse the SQL query to convert.
*
* <p>3. Convert the SQL query to Calcite Relations.
*
Expand All @@ -49,22 +50,26 @@ public void run(final String[] args) {
"test_result" varchar(15),"test_mileage" int, "postcode_area" varchar(15));
""");

// The unquoted identifier casing applied while parsing is configurable via a
// ConverterProvider. The same provider is used for both the schema and the query so that
// identifier casing stays consistent end-to-end. Casing.UNCHANGED preserves identifiers as
// written, matching the lower-case names used in the CREATE TABLE statements above.
final ConverterProvider converterProvider = new ConverterProvider(Casing.UNCHANGED);

final CalciteCatalogReader catalogReader =
SubstraitCreateStatementParser.processCreateStatementsToCatalog(createSqlStatements);
SubstraitCreateStatementParser.processCreateStatementsToCatalog(
converterProvider, createSqlStatements);

// Query that needs to be converted; again this could be in a variety of SQL
// dialects
// Query that needs to be converted
final String sqlQuery =
"""
SELECT vehicles.colour, count(*) as colourcount FROM vehicles INNER JOIN tests
ON vehicles.vehicle_id=tests.vehicle_id WHERE tests.test_result = 'P'
GROUP BY vehicles.colour ORDER BY count(*)
""";
final SqlToSubstrait sqlToSubstrait = new SqlToSubstrait();

// choose DuckDB as an example dialect
final SqlDialect dialect = SqlDialect.DatabaseProduct.DUCKDB.getDialect();
final Plan substraitPlan = sqlToSubstrait.convert(sqlQuery, catalogReader, dialect);
final SqlToSubstrait sqlToSubstrait = new SqlToSubstrait(converterProvider);
final Plan substraitPlan = sqlToSubstrait.convert(sqlQuery, catalogReader);

// Create the proto plan to display to stdout - as it has a better format
final PlanProtoConverter planToProto = new PlanProtoConverter();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import java.util.concurrent.Callable;
import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.prepare.Prepare;
import org.apache.calcite.sql.parser.SqlParser;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
Expand Down Expand Up @@ -60,15 +59,6 @@ enum OutputFormat {
description = "Calcite's casing policy for unquoted identifiers: ${COMPLETION-CANDIDATES}")
private Casing unquotedCasing = Casing.TO_UPPER;

private ConverterProvider converterProvider() {
return new ConverterProvider() {
@Override
public SqlParser.Config getSqlParserConfig() {
return super.getSqlParserConfig().withUnquotedCasing(unquotedCasing);
}
};
}

/**
* Standard Java Main method invoked by the isthmus CLI command.
*
Expand Down Expand Up @@ -96,16 +86,17 @@ public static void main(String... args) {

@Override
public Integer call() throws Exception {
ConverterProvider provider = new ConverterProvider(unquotedCasing);
// Isthmus image is parsing SQL Expression if that argument is defined
if (sqlExpressions != null) {
SqlExpressionToSubstrait converter = new SqlExpressionToSubstrait(converterProvider());
SqlExpressionToSubstrait converter = new SqlExpressionToSubstrait(provider);
ExtendedExpression extendedExpression = converter.convert(sqlExpressions, createStatements);
printMessage(extendedExpression);
} else { // by default Isthmus image are parsing SQL Query
SqlToSubstrait converter = new SqlToSubstrait(converterProvider());
SqlToSubstrait converter = new SqlToSubstrait(provider);
Prepare.CatalogReader catalog =
SubstraitCreateStatementParser.processCreateStatementsToCatalog(
createStatements.toArray(String[]::new));
provider, createStatements);
Plan plan = new PlanProtoConverter().toProto(converter.convert(sql, catalog));
printMessage(plan);
}
Expand Down
93 changes: 90 additions & 3 deletions isthmus/src/main/java/io/substrait/isthmus/ConverterProvider.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,24 @@
*/
public class ConverterProvider {

/**
* A shared default {@link ConverterProvider} instance using all system defaults. Equivalent to
* {@code new ConverterProvider()} but avoids redundant construction at every call site.
*
* <p>This instance is safe to share because {@link ConverterProvider} is effectively immutable
* after construction — all fields are set only in constructors.
*/
public static final ConverterProvider DEFAULT = new ConverterProvider();

/** The Calcite type factory used for creating and managing data types. */
protected RelDataTypeFactory typeFactory;

/** The collection of Substrait extensions (functions and types) available for conversion. */
protected final SimpleExtension.ExtensionCollection extensions;

/** The casing applied to unquoted SQL identifiers during parsing. */
protected final Casing unquotedCasing;

/** Converter for Substrait scalar functions. */
protected ScalarFunctionConverter scalarFunctionConverter;

Expand All @@ -78,6 +90,21 @@ public ConverterProvider() {
this(DefaultExtensionCatalog.DEFAULT_COLLECTION, SubstraitTypeSystem.TYPE_FACTORY);
}

/**
* Creates a ConverterProvider with the specified unquoted identifier casing.
*
* <p>Uses {@link DefaultExtensionCatalog#DEFAULT_COLLECTION} and {@link
* SubstraitTypeSystem#TYPE_FACTORY}.
*
* @param unquotedCasing the casing to apply to unquoted SQL identifiers during parsing
*/
public ConverterProvider(Casing unquotedCasing) {
this(
DefaultExtensionCatalog.DEFAULT_COLLECTION,
SubstraitTypeSystem.TYPE_FACTORY,
unquotedCasing);
}

/**
* Creates a ConverterProvider with the specified extension collection and default type factory.
*
Expand All @@ -95,13 +122,30 @@ public ConverterProvider(SimpleExtension.ExtensionCollection extensions) {
*/
public ConverterProvider(
SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory) {
this(extensions, typeFactory, Casing.TO_UPPER);
}

/**
* Creates a ConverterProvider with the specified extension collection, type factory, and unquoted
* identifier casing.
*
* @param extensions the Substrait extension collection to use
* @param typeFactory the Calcite type factory to use
* @param unquotedCasing the casing to apply to unquoted SQL identifiers during parsing
*/
public ConverterProvider(
SimpleExtension.ExtensionCollection extensions,
RelDataTypeFactory typeFactory,
Casing unquotedCasing) {
this(
typeFactory,
extensions,
new ScalarFunctionConverter(extensions.scalarFunctions(), typeFactory),
new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory),
new WindowFunctionConverter(extensions.windowFunctions(), typeFactory),
TypeConverter.DEFAULT);
TypeConverter.DEFAULT,
createDefaultExecutionBehavior(),
unquotedCasing);
}

/**
Expand All @@ -121,7 +165,15 @@ public ConverterProvider(
AggregateFunctionConverter afc,
WindowFunctionConverter wfc,
TypeConverter tc) {
this(typeFactory, extensions, sfc, afc, wfc, tc, createDefaultExecutionBehavior());
this(
typeFactory,
extensions,
sfc,
afc,
wfc,
tc,
createDefaultExecutionBehavior(),
Casing.TO_UPPER);
}

/**
Expand All @@ -143,13 +195,39 @@ public ConverterProvider(
WindowFunctionConverter wfc,
TypeConverter tc,
Plan.ExecutionBehavior executionBehavior) {
this(typeFactory, extensions, sfc, afc, wfc, tc, executionBehavior, Casing.TO_UPPER);
}

/**
* Creates a ConverterProvider with full customization including execution behavior and unquoted
* identifier casing.
*
* @param typeFactory the Calcite type factory to use
* @param extensions the Substrait extension collection to use
* @param sfc the scalar function converter to use
* @param afc the aggregate function converter to use
* @param wfc the window function converter to use
* @param tc the type converter to use
* @param executionBehavior the execution behavior to use for plans
* @param unquotedCasing the casing to apply to unquoted SQL identifiers during parsing
*/
public ConverterProvider(
RelDataTypeFactory typeFactory,
SimpleExtension.ExtensionCollection extensions,
ScalarFunctionConverter sfc,
AggregateFunctionConverter afc,
WindowFunctionConverter wfc,
TypeConverter tc,
Plan.ExecutionBehavior executionBehavior,
Casing unquotedCasing) {
this.typeFactory = typeFactory;
this.extensions = extensions;
this.scalarFunctionConverter = sfc;
this.aggregateFunctionConverter = afc;
this.windowFunctionConverter = wfc;
this.typeConverter = tc;
this.executionBehavior = executionBehavior;
this.unquotedCasing = unquotedCasing;
}

/**
Expand All @@ -165,6 +243,15 @@ private static Plan.ExecutionBehavior createDefaultExecutionBehavior() {

// SQL to Calcite Processing

/**
* Returns the casing applied to unquoted SQL identifiers during parsing.
*
* @return the unquoted identifier casing
*/
public Casing getUnquotedCasing() {
return unquotedCasing;
}

/**
* {@link SqlParser.Config} is a Calcite class which controls SQL parsing behaviour like
* identifier casing.
Expand All @@ -173,7 +260,7 @@ private static Plan.ExecutionBehavior createDefaultExecutionBehavior() {
*/
public SqlParser.Config getSqlParserConfig() {
return SqlParser.Config.DEFAULT
.withUnquotedCasing(Casing.TO_UPPER)
.withUnquotedCasing(unquotedCasing)
.withParserFactory(SqlDdlParserImpl.FACTORY)
.withConformance(SqlConformanceEnum.LENIENT);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public class SqlExpressionToSubstrait extends SqlConverterBase {

/** Creates a converter with default configuration. */
public SqlExpressionToSubstrait() {
this(new ConverterProvider());
this(ConverterProvider.DEFAULT);
}

/**
Expand Down Expand Up @@ -202,7 +202,7 @@ private Result registerCreateTablesForExtendedExpression(List<String> tables)
if (tables != null) {
for (String tableDef : tables) {
List<SubstraitTable> tList =
SubstraitCreateStatementParser.processCreateStatements(tableDef);
SubstraitCreateStatementParser.processCreateStatements(converterProvider, tableDef);
for (SubstraitTable t : tList) {
rootSchema.add(t.getName(), t);
for (RelDataTypeField field : t.getRowType(factory).getFieldList()) {
Expand Down
34 changes: 16 additions & 18 deletions isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
import io.substrait.plan.Plan;
import io.substrait.plan.Plan.Version;
import org.apache.calcite.prepare.Prepare;
import org.apache.calcite.server.ServerDdlExecutor;
import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.SqlOperatorTable;
import org.apache.calcite.sql.parser.SqlParseException;
import org.apache.calcite.sql.parser.SqlParser;

/**
* Take a SQL statement and a set of table definitions and return a substrait plan.
Expand All @@ -21,7 +19,7 @@ public class SqlToSubstrait extends SqlConverterBase {

/** Creates a SQL-to-Substrait converter using the default configuration. */
public SqlToSubstrait() {
this(new ConverterProvider());
this(ConverterProvider.DEFAULT);
}

/**
Expand Down Expand Up @@ -50,7 +48,9 @@ public Plan convert(final String sqlStatements, final Prepare.CatalogReader cata
builder.executionBehavior(converterProvider.getExecutionBehavior());

// TODO: consider case in which one sql passes conversion while others don't
SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader, operatorTable).stream()
SubstraitSqlToCalcite.convertQueries(
sqlStatements, catalogReader, converterProvider, operatorTable)
.stream()
.map(root -> SubstraitRelVisitor.convert(root, converterProvider))
.forEach(root -> builder.addRoots(root));

Expand All @@ -60,31 +60,29 @@ public Plan convert(final String sqlStatements, final Prepare.CatalogReader cata
/**
* Converts one or more SQL statements into a Substrait {@link Plan}.
*
* <p>The {@code sqlDialect} parameter was previously used to influence identifier casing during
* parsing. This is now configurable directly via {@link
* ConverterProvider#ConverterProvider(org.apache.calcite.avatica.util.Casing)}, or for fully
* custom parser behaviour, by subclassing {@link ConverterProvider} and overriding {@link
* ConverterProvider#getSqlParserConfig()}.
*
* @param sqlStatements a string containing one more SQL statements
* @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in
* the SQL statements
* @param sqlDialect The sql dialect to use for parsing.
* @return the Substrait {@link Plan}
* @throws SqlParseException if there is an error while parsing the SQL statements
* @deprecated Prefer constructing {@link SqlToSubstrait} with a {@link ConverterProvider}
* configured for the desired casing and calling {@link #convert(String,
* Prepare.CatalogReader)}. For fully custom parser behaviour, subclass {@link
* ConverterProvider} and override {@link ConverterProvider#getSqlParserConfig()}.
*/
@Deprecated
public Plan convert(
final String sqlStatements,
final Prepare.CatalogReader catalogReader,
final SqlDialect sqlDialect)
throws SqlParseException {
Builder builder = io.substrait.plan.Plan.builder();
builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build());
builder.executionBehavior(converterProvider.getExecutionBehavior());

final SqlParser.Config sqlParserConfig =
sqlDialect.configureParser(
SqlParser.config().withParserFactory(ServerDdlExecutor.PARSER_FACTORY));

// TODO: consider case in which one sql passes conversion while others don't
SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader, sqlParserConfig).stream()
.map(root -> SubstraitRelVisitor.convert(root, converterProvider))
.forEach(root -> builder.addRoots(root));

return builder.build();
return convert(sqlStatements, catalogReader);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public class SubstraitToSql extends SqlConverterBase {

/** Creates a SubstraitToSql converter with default configuration and extensions. */
public SubstraitToSql() {
this(new ConverterProvider());
this(ConverterProvider.DEFAULT);
}

/**
Expand Down
Loading
Loading