diff --git a/examples/isthmus-api/src/main/java/io/substrait/examples/FromSql.java b/examples/isthmus-api/src/main/java/io/substrait/examples/FromSql.java
index 17973b683..a9cf16fd9 100644
--- a/examples/isthmus-api/src/main/java/io/substrait/examples/FromSql.java
+++ b/examples/isthmus-api/src/main/java/io/substrait/examples/FromSql.java
@@ -1,6 +1,7 @@
package io.substrait.examples;
import io.substrait.examples.IsthmusAppExamples.Action;
+import io.substrait.isthmus.ConverterProvider;
import io.substrait.isthmus.SqlToSubstrait;
import io.substrait.isthmus.sql.SubstraitCreateStatementParser;
import io.substrait.plan.Plan;
@@ -10,8 +11,8 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
+import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.prepare.CalciteCatalogReader;
-import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.parser.SqlParseException;
/**
@@ -22,7 +23,7 @@
*
1. Create a fully typed schema for the inputs. Within a SQL context this represents the CREATE
* TABLE commands, which need to be converted to a Calcite Schema.
*
- *
2. Parse the SQL query to convert (in the source SQL dialect).
+ *
2. Parse the SQL query to convert.
*
*
3. Convert the SQL query to Calcite Relations.
*
@@ -49,22 +50,26 @@ public void run(final String[] args) {
"test_result" varchar(15),"test_mileage" int, "postcode_area" varchar(15));
""");
+ // The unquoted identifier casing applied while parsing is configurable via a
+ // ConverterProvider. The same provider is used for both the schema and the query so that
+ // identifier casing stays consistent end-to-end. Casing.UNCHANGED preserves identifiers as
+ // written, matching the lower-case names used in the CREATE TABLE statements above.
+ final ConverterProvider converterProvider = new ConverterProvider(Casing.UNCHANGED);
+
final CalciteCatalogReader catalogReader =
- SubstraitCreateStatementParser.processCreateStatementsToCatalog(createSqlStatements);
+ SubstraitCreateStatementParser.processCreateStatementsToCatalog(
+ converterProvider, createSqlStatements);
- // Query that needs to be converted; again this could be in a variety of SQL
- // dialects
+ // Query that needs to be converted
final String sqlQuery =
"""
SELECT vehicles.colour, count(*) as colourcount FROM vehicles INNER JOIN tests
ON vehicles.vehicle_id=tests.vehicle_id WHERE tests.test_result = 'P'
GROUP BY vehicles.colour ORDER BY count(*)
""";
- final SqlToSubstrait sqlToSubstrait = new SqlToSubstrait();
- // choose DuckDB as an example dialect
- final SqlDialect dialect = SqlDialect.DatabaseProduct.DUCKDB.getDialect();
- final Plan substraitPlan = sqlToSubstrait.convert(sqlQuery, catalogReader, dialect);
+ final SqlToSubstrait sqlToSubstrait = new SqlToSubstrait(converterProvider);
+ final Plan substraitPlan = sqlToSubstrait.convert(sqlQuery, catalogReader);
// Create the proto plan to display to stdout - as it has a better format
final PlanProtoConverter planToProto = new PlanProtoConverter();
diff --git a/isthmus-cli/src/main/java/io/substrait/isthmus/cli/IsthmusEntryPoint.java b/isthmus-cli/src/main/java/io/substrait/isthmus/cli/IsthmusEntryPoint.java
index 03d4fef2b..39566ebf3 100644
--- a/isthmus-cli/src/main/java/io/substrait/isthmus/cli/IsthmusEntryPoint.java
+++ b/isthmus-cli/src/main/java/io/substrait/isthmus/cli/IsthmusEntryPoint.java
@@ -15,7 +15,6 @@
import java.util.concurrent.Callable;
import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.prepare.Prepare;
-import org.apache.calcite.sql.parser.SqlParser;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
@@ -60,15 +59,6 @@ enum OutputFormat {
description = "Calcite's casing policy for unquoted identifiers: ${COMPLETION-CANDIDATES}")
private Casing unquotedCasing = Casing.TO_UPPER;
- private ConverterProvider converterProvider() {
- return new ConverterProvider() {
- @Override
- public SqlParser.Config getSqlParserConfig() {
- return super.getSqlParserConfig().withUnquotedCasing(unquotedCasing);
- }
- };
- }
-
/**
* Standard Java Main method invoked by the isthmus CLI command.
*
@@ -96,16 +86,17 @@ public static void main(String... args) {
@Override
public Integer call() throws Exception {
+ ConverterProvider provider = new ConverterProvider(unquotedCasing);
// Isthmus image is parsing SQL Expression if that argument is defined
if (sqlExpressions != null) {
- SqlExpressionToSubstrait converter = new SqlExpressionToSubstrait(converterProvider());
+ SqlExpressionToSubstrait converter = new SqlExpressionToSubstrait(provider);
ExtendedExpression extendedExpression = converter.convert(sqlExpressions, createStatements);
printMessage(extendedExpression);
} else { // by default Isthmus image are parsing SQL Query
- SqlToSubstrait converter = new SqlToSubstrait(converterProvider());
+ SqlToSubstrait converter = new SqlToSubstrait(provider);
Prepare.CatalogReader catalog =
SubstraitCreateStatementParser.processCreateStatementsToCatalog(
- createStatements.toArray(String[]::new));
+ provider, createStatements);
Plan plan = new PlanProtoConverter().toProto(converter.convert(sql, catalog));
printMessage(plan);
}
diff --git a/isthmus/src/main/java/io/substrait/isthmus/ConverterProvider.java b/isthmus/src/main/java/io/substrait/isthmus/ConverterProvider.java
index 72e77586d..a9836b7bf 100644
--- a/isthmus/src/main/java/io/substrait/isthmus/ConverterProvider.java
+++ b/isthmus/src/main/java/io/substrait/isthmus/ConverterProvider.java
@@ -49,12 +49,24 @@
*/
public class ConverterProvider {
+ /**
+ * A shared default {@link ConverterProvider} instance using all system defaults. Equivalent to
+ * {@code new ConverterProvider()} but avoids redundant construction at every call site.
+ *
+ *
This instance is safe to share because {@link ConverterProvider} is effectively immutable
+ * after construction — all fields are set only in constructors.
+ */
+ public static final ConverterProvider DEFAULT = new ConverterProvider();
+
/** The Calcite type factory used for creating and managing data types. */
protected RelDataTypeFactory typeFactory;
/** The collection of Substrait extensions (functions and types) available for conversion. */
protected final SimpleExtension.ExtensionCollection extensions;
+ /** The casing applied to unquoted SQL identifiers during parsing. */
+ protected final Casing unquotedCasing;
+
/** Converter for Substrait scalar functions. */
protected ScalarFunctionConverter scalarFunctionConverter;
@@ -78,6 +90,21 @@ public ConverterProvider() {
this(DefaultExtensionCatalog.DEFAULT_COLLECTION, SubstraitTypeSystem.TYPE_FACTORY);
}
+ /**
+ * Creates a ConverterProvider with the specified unquoted identifier casing.
+ *
+ *
Uses {@link DefaultExtensionCatalog#DEFAULT_COLLECTION} and {@link
+ * SubstraitTypeSystem#TYPE_FACTORY}.
+ *
+ * @param unquotedCasing the casing to apply to unquoted SQL identifiers during parsing
+ */
+ public ConverterProvider(Casing unquotedCasing) {
+ this(
+ DefaultExtensionCatalog.DEFAULT_COLLECTION,
+ SubstraitTypeSystem.TYPE_FACTORY,
+ unquotedCasing);
+ }
+
/**
* Creates a ConverterProvider with the specified extension collection and default type factory.
*
@@ -95,13 +122,30 @@ public ConverterProvider(SimpleExtension.ExtensionCollection extensions) {
*/
public ConverterProvider(
SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory) {
+ this(extensions, typeFactory, Casing.TO_UPPER);
+ }
+
+ /**
+ * Creates a ConverterProvider with the specified extension collection, type factory, and unquoted
+ * identifier casing.
+ *
+ * @param extensions the Substrait extension collection to use
+ * @param typeFactory the Calcite type factory to use
+ * @param unquotedCasing the casing to apply to unquoted SQL identifiers during parsing
+ */
+ public ConverterProvider(
+ SimpleExtension.ExtensionCollection extensions,
+ RelDataTypeFactory typeFactory,
+ Casing unquotedCasing) {
this(
typeFactory,
extensions,
new ScalarFunctionConverter(extensions.scalarFunctions(), typeFactory),
new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory),
new WindowFunctionConverter(extensions.windowFunctions(), typeFactory),
- TypeConverter.DEFAULT);
+ TypeConverter.DEFAULT,
+ createDefaultExecutionBehavior(),
+ unquotedCasing);
}
/**
@@ -121,7 +165,15 @@ public ConverterProvider(
AggregateFunctionConverter afc,
WindowFunctionConverter wfc,
TypeConverter tc) {
- this(typeFactory, extensions, sfc, afc, wfc, tc, createDefaultExecutionBehavior());
+ this(
+ typeFactory,
+ extensions,
+ sfc,
+ afc,
+ wfc,
+ tc,
+ createDefaultExecutionBehavior(),
+ Casing.TO_UPPER);
}
/**
@@ -143,6 +195,31 @@ public ConverterProvider(
WindowFunctionConverter wfc,
TypeConverter tc,
Plan.ExecutionBehavior executionBehavior) {
+ this(typeFactory, extensions, sfc, afc, wfc, tc, executionBehavior, Casing.TO_UPPER);
+ }
+
+ /**
+ * Creates a ConverterProvider with full customization including execution behavior and unquoted
+ * identifier casing.
+ *
+ * @param typeFactory the Calcite type factory to use
+ * @param extensions the Substrait extension collection to use
+ * @param sfc the scalar function converter to use
+ * @param afc the aggregate function converter to use
+ * @param wfc the window function converter to use
+ * @param tc the type converter to use
+ * @param executionBehavior the execution behavior to use for plans
+ * @param unquotedCasing the casing to apply to unquoted SQL identifiers during parsing
+ */
+ public ConverterProvider(
+ RelDataTypeFactory typeFactory,
+ SimpleExtension.ExtensionCollection extensions,
+ ScalarFunctionConverter sfc,
+ AggregateFunctionConverter afc,
+ WindowFunctionConverter wfc,
+ TypeConverter tc,
+ Plan.ExecutionBehavior executionBehavior,
+ Casing unquotedCasing) {
this.typeFactory = typeFactory;
this.extensions = extensions;
this.scalarFunctionConverter = sfc;
@@ -150,6 +227,7 @@ public ConverterProvider(
this.windowFunctionConverter = wfc;
this.typeConverter = tc;
this.executionBehavior = executionBehavior;
+ this.unquotedCasing = unquotedCasing;
}
/**
@@ -165,6 +243,15 @@ private static Plan.ExecutionBehavior createDefaultExecutionBehavior() {
// SQL to Calcite Processing
+ /**
+ * Returns the casing applied to unquoted SQL identifiers during parsing.
+ *
+ * @return the unquoted identifier casing
+ */
+ public Casing getUnquotedCasing() {
+ return unquotedCasing;
+ }
+
/**
* {@link SqlParser.Config} is a Calcite class which controls SQL parsing behaviour like
* identifier casing.
@@ -173,7 +260,7 @@ private static Plan.ExecutionBehavior createDefaultExecutionBehavior() {
*/
public SqlParser.Config getSqlParserConfig() {
return SqlParser.Config.DEFAULT
- .withUnquotedCasing(Casing.TO_UPPER)
+ .withUnquotedCasing(unquotedCasing)
.withParserFactory(SqlDdlParserImpl.FACTORY)
.withConformance(SqlConformanceEnum.LENIENT);
}
diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java
index 5c020d5d1..9637cd6fd 100644
--- a/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java
+++ b/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java
@@ -40,7 +40,7 @@ public class SqlExpressionToSubstrait extends SqlConverterBase {
/** Creates a converter with default configuration. */
public SqlExpressionToSubstrait() {
- this(new ConverterProvider());
+ this(ConverterProvider.DEFAULT);
}
/**
@@ -202,7 +202,7 @@ private Result registerCreateTablesForExtendedExpression(List tables)
if (tables != null) {
for (String tableDef : tables) {
List tList =
- SubstraitCreateStatementParser.processCreateStatements(tableDef);
+ SubstraitCreateStatementParser.processCreateStatements(converterProvider, tableDef);
for (SubstraitTable t : tList) {
rootSchema.add(t.getName(), t);
for (RelDataTypeField field : t.getRowType(factory).getFieldList()) {
diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java
index 765dd8dfd..36a142931 100644
--- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java
+++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java
@@ -5,11 +5,9 @@
import io.substrait.plan.Plan;
import io.substrait.plan.Plan.Version;
import org.apache.calcite.prepare.Prepare;
-import org.apache.calcite.server.ServerDdlExecutor;
import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.SqlOperatorTable;
import org.apache.calcite.sql.parser.SqlParseException;
-import org.apache.calcite.sql.parser.SqlParser;
/**
* Take a SQL statement and a set of table definitions and return a substrait plan.
@@ -21,7 +19,7 @@ public class SqlToSubstrait extends SqlConverterBase {
/** Creates a SQL-to-Substrait converter using the default configuration. */
public SqlToSubstrait() {
- this(new ConverterProvider());
+ this(ConverterProvider.DEFAULT);
}
/**
@@ -50,7 +48,9 @@ public Plan convert(final String sqlStatements, final Prepare.CatalogReader cata
builder.executionBehavior(converterProvider.getExecutionBehavior());
// TODO: consider case in which one sql passes conversion while others don't
- SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader, operatorTable).stream()
+ SubstraitSqlToCalcite.convertQueries(
+ sqlStatements, catalogReader, converterProvider, operatorTable)
+ .stream()
.map(root -> SubstraitRelVisitor.convert(root, converterProvider))
.forEach(root -> builder.addRoots(root));
@@ -60,31 +60,29 @@ public Plan convert(final String sqlStatements, final Prepare.CatalogReader cata
/**
* Converts one or more SQL statements into a Substrait {@link Plan}.
*
+ * The {@code sqlDialect} parameter was previously used to influence identifier casing during
+ * parsing. This is now configurable directly via {@link
+ * ConverterProvider#ConverterProvider(org.apache.calcite.avatica.util.Casing)}, or for fully
+ * custom parser behaviour, by subclassing {@link ConverterProvider} and overriding {@link
+ * ConverterProvider#getSqlParserConfig()}.
+ *
* @param sqlStatements a string containing one more SQL statements
* @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in
* the SQL statements
* @param sqlDialect The sql dialect to use for parsing.
* @return the Substrait {@link Plan}
* @throws SqlParseException if there is an error while parsing the SQL statements
+ * @deprecated Prefer constructing {@link SqlToSubstrait} with a {@link ConverterProvider}
+ * configured for the desired casing and calling {@link #convert(String,
+ * Prepare.CatalogReader)}. For fully custom parser behaviour, subclass {@link
+ * ConverterProvider} and override {@link ConverterProvider#getSqlParserConfig()}.
*/
+ @Deprecated
public Plan convert(
final String sqlStatements,
final Prepare.CatalogReader catalogReader,
final SqlDialect sqlDialect)
throws SqlParseException {
- Builder builder = io.substrait.plan.Plan.builder();
- builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build());
- builder.executionBehavior(converterProvider.getExecutionBehavior());
-
- final SqlParser.Config sqlParserConfig =
- sqlDialect.configureParser(
- SqlParser.config().withParserFactory(ServerDdlExecutor.PARSER_FACTORY));
-
- // TODO: consider case in which one sql passes conversion while others don't
- SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader, sqlParserConfig).stream()
- .map(root -> SubstraitRelVisitor.convert(root, converterProvider))
- .forEach(root -> builder.addRoots(root));
-
- return builder.build();
+ return convert(sqlStatements, catalogReader);
}
}
diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java
index 690bd5774..ac07d8d65 100644
--- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java
+++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java
@@ -23,7 +23,7 @@ public class SubstraitToSql extends SqlConverterBase {
/** Creates a SubstraitToSql converter with default configuration and extensions. */
public SubstraitToSql() {
- this(new ConverterProvider());
+ this(ConverterProvider.DEFAULT);
}
/**
diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java
index b008d577d..f8179e397 100644
--- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java
+++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java
@@ -1,5 +1,6 @@
package io.substrait.isthmus.sql;
+import io.substrait.isthmus.ConverterProvider;
import io.substrait.isthmus.SqlConverterBase;
import io.substrait.isthmus.SubstraitTypeSystem;
import io.substrait.isthmus.Utils;
@@ -50,28 +51,7 @@ public class SubstraitCreateStatementParser {
*/
public static List processCreateStatements(@NonNull final String createStatements)
throws SqlParseException {
- final List tableList = new ArrayList<>();
-
- final List sqlNode = SubstraitSqlStatementParser.parseStatements(createStatements);
- for (final SqlNode parsed : sqlNode) {
- if (!(parsed instanceof SqlCreateTable)) {
- throw fail("Not a valid CREATE TABLE statement.");
- }
-
- final SqlCreateTable create = (SqlCreateTable) parsed;
-
- if (create.name.names.size() > 1) {
- throw fail("Only simple table names are allowed.", create.name.getParserPosition());
- }
-
- if (create.query != null) {
- throw fail("CTAS not supported.", create.name.getParserPosition());
- }
-
- tableList.add(createSubstraitTable(create.name.names.get(0), create.columnList));
- }
-
- return tableList;
+ return processCreateStatements(ConverterProvider.DEFAULT, createStatements);
}
/**
@@ -89,6 +69,26 @@ public static CalciteCatalogReader processCreateStatementsToCatalog(
return processCreateStatementsToCatalog(createStatements.toArray(new String[0]));
}
+ /**
+ * Parses one or more SQL strings containing only CREATE statements into a {@link
+ * CalciteCatalogReader}, using the parser settings from the given {@link ConverterProvider}.
+ *
+ * This method expects the use of fully qualified table names in the CREATE statements.
+ *
+ * @param converterProvider the converter provider whose parser config controls identifier casing
+ * and other parser settings
+ * @param createStatements List of SQL strings containing only CREATE statements, must not be null
+ * @return a {@link CalciteCatalogReader} generated from the CREATE statements
+ * @throws SqlParseException if there is an error parsing the SQL statements
+ */
+ public static CalciteCatalogReader processCreateStatementsToCatalog(
+ @NonNull final ConverterProvider converterProvider,
+ @NonNull final List createStatements)
+ throws SqlParseException {
+ return processCreateStatementsToCatalog(
+ converterProvider, createStatements.toArray(new String[0]));
+ }
+
/**
* Parses one or more SQL strings containing only CREATE statements into a {@link
* CalciteCatalogReader}
@@ -101,7 +101,33 @@ public static CalciteCatalogReader processCreateStatementsToCatalog(
*/
public static CalciteCatalogReader processCreateStatementsToCatalog(
@NonNull final String... createStatements) throws SqlParseException {
- final CalciteSchema rootSchema = processCreateStatementsToSchema(createStatements);
+ final CalciteSchema rootSchema =
+ processCreateStatementsToSchema(ConverterProvider.DEFAULT, createStatements);
+ final List defaultSchema = Collections.emptyList();
+ return new CalciteCatalogReader(
+ rootSchema,
+ defaultSchema,
+ SubstraitTypeSystem.TYPE_FACTORY,
+ SqlConverterBase.CONNECTION_CONFIG);
+ }
+
+ /**
+ * Parses one or more SQL strings containing only CREATE statements into a {@link
+ * CalciteCatalogReader}, using the parser settings from the given {@link ConverterProvider}.
+ *
+ * This method expects the use of fully qualified table names in the CREATE statements.
+ *
+ * @param converterProvider the converter provider whose parser config controls identifier casing
+ * and other parser settings
+ * @param createStatements a SQL string containing only CREATE statements, must not be null
+ * @return a {@link CalciteCatalogReader} generated from the CREATE statements
+ * @throws SqlParseException if parsing fails or statements are invalid
+ */
+ public static CalciteCatalogReader processCreateStatementsToCatalog(
+ @NonNull final ConverterProvider converterProvider, @NonNull final String... createStatements)
+ throws SqlParseException {
+ final CalciteSchema rootSchema =
+ processCreateStatementsToSchema(converterProvider, createStatements);
final List defaultSchema = Collections.emptyList();
return new CalciteCatalogReader(
rootSchema,
@@ -133,19 +159,58 @@ private static SqlParseException fail(@Nullable final String message) {
}
/**
- * Parses one or more SQL strings containing only CREATE statements into a {@link CalciteSchema}.
+ * Parses a SQL string containing only CREATE statements into a list of {@link SubstraitTable}s,
+ * using the parser settings from the given {@link ConverterProvider}.
+ *
+ * This method only supports simple table names without any additional qualifiers. Only used
+ * with {@link io.substrait.isthmus.SqlExpressionToSubstrait}.
*
- * @param createStatements one or more SQL strings containing only CREATE statements; must not be
- * null
- * @return a {@link CalciteSchema} generated from the CREATE statements
+ * @param converterProvider the converter provider whose parser config controls identifier casing
+ * and other parser settings
+ * @param createStatements a SQL string containing only CREATE statements; must not be null
+ * @return list of {@link SubstraitTable}s generated from the CREATE statements
* @throws SqlParseException if parsing fails or statements are invalid
*/
+ public static List processCreateStatements(
+ @NonNull final ConverterProvider converterProvider, @NonNull final String createStatements)
+ throws SqlParseException {
+ final List tableList = new ArrayList<>();
+
+ final List sqlNode =
+ SubstraitSqlStatementParser.parseStatements(createStatements, converterProvider);
+ for (final SqlNode parsed : sqlNode) {
+ if (!(parsed instanceof SqlCreateTable)) {
+ throw fail("Not a valid CREATE TABLE statement.");
+ }
+
+ final SqlCreateTable create = (SqlCreateTable) parsed;
+
+ if (create.name.names.size() > 1) {
+ throw fail("Only simple table names are allowed.", create.name.getParserPosition());
+ }
+
+ if (create.query != null) {
+ throw fail("CTAS not supported.", create.name.getParserPosition());
+ }
+
+ tableList.add(createSubstraitTable(create.name.names.get(0), create.columnList));
+ }
+
+ return tableList;
+ }
+
+ /**
+ * Parses one or more SQL strings containing only CREATE statements into a {@link CalciteSchema}
+ * using the given provider's parser config.
+ */
private static CalciteSchema processCreateStatementsToSchema(
- @NonNull final String... createStatements) throws SqlParseException {
+ @NonNull final ConverterProvider converterProvider, @NonNull final String... createStatements)
+ throws SqlParseException {
final CalciteSchema rootSchema = CalciteSchema.createRootSchema(false);
for (final String statement : createStatements) {
- final List sqlNode = SubstraitSqlStatementParser.parseStatements(statement);
+ final List sqlNode =
+ SubstraitSqlStatementParser.parseStatements(statement, converterProvider);
for (final SqlNode parsed : sqlNode) {
if (!(parsed instanceof SqlCreateTable)) {
throw fail("Not a valid CREATE TABLE statement.");
diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java
index 8da220c84..c6ddb719c 100644
--- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java
+++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java
@@ -1,12 +1,10 @@
package io.substrait.isthmus.sql;
+import io.substrait.isthmus.ConverterProvider;
import java.util.List;
-import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.parser.SqlParseException;
import org.apache.calcite.sql.parser.SqlParser;
-import org.apache.calcite.sql.parser.ddl.SqlDdlParserImpl;
-import org.apache.calcite.sql.validate.SqlConformanceEnum;
/**
* Utility class for parsing SQL statements to {@link SqlNode}s using a Substrait flavoured SQL
@@ -14,14 +12,6 @@
*/
public class SubstraitSqlStatementParser {
- private static final SqlParser.Config PARSER_CONFIG =
- SqlParser.config()
- // TODO: switch to Casing.UNCHANGED
- .withUnquotedCasing(Casing.TO_UPPER)
- // use LENIENT conformance to allow for parsing a wide variety of dialects
- .withConformance(SqlConformanceEnum.LENIENT)
- .withParserFactory(SqlDdlParserImpl.FACTORY);
-
/**
* Parse one or more SQL statements to a list of {@link SqlNode}s.
*
@@ -30,20 +20,25 @@ public class SubstraitSqlStatementParser {
* @throws SqlParseException if there is an error while parsing the SQL statements
*/
public static List parseStatements(String sqlStatements) throws SqlParseException {
- return parseStatements(sqlStatements, PARSER_CONFIG);
+ return parseStatements(sqlStatements, ConverterProvider.DEFAULT);
}
/**
- * Parse one or more SQL statements to a list of {@link SqlNode}s.
+ * Parse one or more SQL statements to a list of {@link SqlNode}s, using the parser settings from
+ * the given {@link ConverterProvider}.
+ *
+ * To use a fully custom parser configuration, subclass {@link ConverterProvider} and override
+ * {@link ConverterProvider#getSqlParserConfig()}.
*
* @param sqlStatements a string containing one or more SQL statements
- * @param parserConfig Calcite SqlParser.Config to control the parser
+ * @param converterProvider the converter provider whose parser config controls identifier casing
+ * and other parser settings
* @return a list of {@link SqlNode}s corresponding to the given statements
* @throws SqlParseException if there is an error while parsing the SQL statements
*/
- public static List parseStatements(String sqlStatements, SqlParser.Config parserConfig)
- throws SqlParseException {
- SqlParser parser = SqlParser.create(sqlStatements, parserConfig);
+ public static List parseStatements(
+ String sqlStatements, ConverterProvider converterProvider) throws SqlParseException {
+ SqlParser parser = SqlParser.create(sqlStatements, converterProvider.getSqlParserConfig());
return parser.parseStmtList();
}
}
diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java
index d2ec84111..7855f5263 100644
--- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java
+++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java
@@ -1,5 +1,6 @@
package io.substrait.isthmus.sql;
+import io.substrait.isthmus.ConverterProvider;
import io.substrait.isthmus.SubstraitTypeSystem;
import io.substrait.isthmus.calcite.rel.DdlSqlToRelConverter;
import java.util.List;
@@ -17,7 +18,6 @@
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperatorTable;
import org.apache.calcite.sql.parser.SqlParseException;
-import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.validate.SqlValidator;
import org.apache.calcite.sql2rel.SqlToRelConverter;
import org.apache.calcite.sql2rel.StandardConvertletTable;
@@ -81,7 +81,18 @@ public static RelRoot convertQuery(
SqlValidator validator,
RelOptCluster cluster)
throws SqlParseException {
- List sqlNodes = SubstraitSqlStatementParser.parseStatements(sqlStatement);
+ return convertQuery(sqlStatement, catalogReader, validator, cluster, ConverterProvider.DEFAULT);
+ }
+
+ static RelRoot convertQuery(
+ String sqlStatement,
+ Prepare.CatalogReader catalogReader,
+ SqlValidator validator,
+ RelOptCluster cluster,
+ ConverterProvider converterProvider)
+ throws SqlParseException {
+ List sqlNodes =
+ SubstraitSqlStatementParser.parseStatements(sqlStatement, converterProvider);
if (sqlNodes.size() != 1) {
throw new IllegalArgumentException(
String.format("Expected one statement, found: %d", sqlNodes.size()));
@@ -111,39 +122,48 @@ public static List convertQueries(
/**
* Converts one or more SQL statements to a List of {@link RelRoot}, with one {@link RelRoot} per
- * statement.
+ * statement, using the parser configuration from the given {@link ConverterProvider}.
*
* @param sqlStatements a string containing one or more SQL statements
* @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in
* the SQL statements
+ * @param converterProvider the converter provider whose parser config controls identifier casing
+ * and other parser settings
+ * @param operatorTable the {@link SqlOperatorTable} for controlling valid operators
* @return a list of {@link RelRoot}s corresponding to the given SQL statements
* @throws SqlParseException if there is an error while parsing the SQL statements
*/
public static List convertQueries(
- String sqlStatements, Prepare.CatalogReader catalogReader) throws SqlParseException {
- SqlValidator validator = new SubstraitSqlValidator(catalogReader);
- return convertQueries(sqlStatements, catalogReader, validator, createDefaultRelOptCluster());
+ String sqlStatements,
+ Prepare.CatalogReader catalogReader,
+ ConverterProvider converterProvider,
+ SqlOperatorTable operatorTable)
+ throws SqlParseException {
+ SqlValidator validator = new SubstraitSqlValidator(catalogReader, operatorTable);
+ return convertQueries(
+ sqlStatements, catalogReader, validator, createDefaultRelOptCluster(), converterProvider);
}
/**
* Converts one or more SQL statements to a List of {@link RelRoot}, with one {@link RelRoot} per
- * statement.
+ * statement, using the parser configuration from the given {@link ConverterProvider}.
*
* @param sqlStatements a string containing one or more SQL statements
* @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in
* the SQL statements
- * @param parserConfig Calcite Parser config to use with the given SQL Statements
+ * @param converterProvider the converter provider whose parser config controls identifier casing
+ * and other parser settings
* @return a list of {@link RelRoot}s corresponding to the given SQL statements
* @throws SqlParseException if there is an error while parsing the SQL statements
*/
public static List convertQueries(
String sqlStatements,
Prepare.CatalogReader catalogReader,
- final SqlParser.Config parserConfig)
+ ConverterProvider converterProvider)
throws SqlParseException {
SqlValidator validator = new SubstraitSqlValidator(catalogReader);
return convertQueries(
- sqlStatements, catalogReader, validator, createDefaultRelOptCluster(), parserConfig);
+ sqlStatements, catalogReader, validator, createDefaultRelOptCluster(), converterProvider);
}
/**
@@ -153,23 +173,13 @@ public static List convertQueries(
* @param sqlStatements a string containing one or more SQL statements
* @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in
* the SQL statements
- * @param validator the {@link SqlValidator} used to validate SQL statements. Allows for
- * additional control of SQL functions and operators via {@link
- * SqlValidator#getOperatorTable()}
- * @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement
- * processing. Calcite expects that the {@link RelOptCluster} used during statement processing
- * is the same as that used during query optimization.
* @return a list of {@link RelRoot}s corresponding to the given SQL statements
* @throws SqlParseException if there is an error while parsing the SQL statements
*/
public static List convertQueries(
- String sqlStatements,
- Prepare.CatalogReader catalogReader,
- SqlValidator validator,
- RelOptCluster cluster)
- throws SqlParseException {
- List sqlNodes = SubstraitSqlStatementParser.parseStatements(sqlStatements);
- return convert(sqlNodes, catalogReader, validator, cluster);
+ String sqlStatements, Prepare.CatalogReader catalogReader) throws SqlParseException {
+ SqlValidator validator = new SubstraitSqlValidator(catalogReader);
+ return convertQueries(sqlStatements, catalogReader, validator, createDefaultRelOptCluster());
}
/**
@@ -185,19 +195,28 @@ public static List convertQueries(
* @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement
* processing. Calcite expects that the {@link RelOptCluster} used during statement processing
* is the same as that used during query optimization.
- * @param parserConfig Calcite Parser config to use with the given SQL Statements
* @return a list of {@link RelRoot}s corresponding to the given SQL statements
* @throws SqlParseException if there is an error while parsing the SQL statements
*/
public static List convertQueries(
+ String sqlStatements,
+ Prepare.CatalogReader catalogReader,
+ SqlValidator validator,
+ RelOptCluster cluster)
+ throws SqlParseException {
+ return convertQueries(
+ sqlStatements, catalogReader, validator, cluster, ConverterProvider.DEFAULT);
+ }
+
+ static List convertQueries(
String sqlStatements,
Prepare.CatalogReader catalogReader,
SqlValidator validator,
RelOptCluster cluster,
- SqlParser.Config parserConfig)
+ ConverterProvider converterProvider)
throws SqlParseException {
List sqlNodes =
- SubstraitSqlStatementParser.parseStatements(sqlStatements, parserConfig);
+ SubstraitSqlStatementParser.parseStatements(sqlStatements, converterProvider);
return convert(sqlNodes, catalogReader, validator, cluster);
}
diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionTest.java
index fbd6a4163..760a2fc03 100644
--- a/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionTest.java
+++ b/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionTest.java
@@ -2,7 +2,6 @@
import io.substrait.isthmus.sql.SubstraitCreateStatementParser;
import org.apache.calcite.prepare.Prepare;
-import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.parser.SqlParseException;
import org.junit.jupiter.api.Test;
@@ -14,9 +13,6 @@ void testConversion() throws SqlParseException {
"create table src1 (intcol int, charcol varchar(10))");
final SqlToSubstrait converter = new SqlToSubstrait();
- converter.convert(
- "create table dst1 as select * from src1",
- catalogReader,
- SqlDialect.DatabaseProduct.CALCITE.getDialect());
+ converter.convert("create table dst1 as select * from src1", catalogReader);
}
}
diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionWithOptimizationTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionWithOptimizationTest.java
index 68763615c..ef68860d6 100644
--- a/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionWithOptimizationTest.java
+++ b/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionWithOptimizationTest.java
@@ -19,10 +19,7 @@
import org.apache.calcite.rel.RelRoot;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.rules.CoreRules;
-import org.apache.calcite.server.ServerDdlExecutor;
-import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.parser.SqlParseException;
-import org.apache.calcite.sql.parser.SqlParser;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
@@ -36,12 +33,6 @@
*/
class DdlToSubstraitConversionWithOptimizationTest {
- /** Parser configuration for DDL statements. */
- private static final SqlParser.Config DDL_PARSER_CONFIG =
- SqlDialect.DatabaseProduct.CALCITE
- .getDialect()
- .configureParser(SqlParser.config().withParserFactory(ServerDdlExecutor.PARSER_FACTORY));
-
private final ConverterProvider converterProvider = new ConverterProvider();
/**
@@ -68,7 +59,7 @@ void testDdlOptimizationMutation(String ddlType) throws SqlParseException {
// Convert DDL to RelRoot
List relRoots =
- SubstraitSqlToCalcite.convertQueries(createStatement, catalogReader, DDL_PARSER_CONFIG);
+ SubstraitSqlToCalcite.convertQueries(createStatement, catalogReader, converterProvider);
RelRoot relRoot = relRoots.get(0);
// Apply an optimization rule (FILTER_INTO_JOIN) that will structurally mutate the children
diff --git a/isthmus/src/test/java/io/substrait/isthmus/UnquotedCasingTest.java b/isthmus/src/test/java/io/substrait/isthmus/UnquotedCasingTest.java
new file mode 100644
index 000000000..75affb69d
--- /dev/null
+++ b/isthmus/src/test/java/io/substrait/isthmus/UnquotedCasingTest.java
@@ -0,0 +1,76 @@
+package io.substrait.isthmus;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import io.substrait.isthmus.sql.SubstraitCreateStatementParser;
+import io.substrait.plan.Plan;
+import io.substrait.relation.NamedScan;
+import io.substrait.relation.Project;
+import org.apache.calcite.avatica.util.Casing;
+import org.apache.calcite.prepare.Prepare;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Verifies that the {@link ConverterProvider} unquoted casing parameter controls identifier casing
+ * consistently across both CREATE TABLE parsing and query parsing, so that the table name stored in
+ * a Substrait {@link NamedScan} reflects the configured casing.
+ */
+class UnquotedCasingTest {
+
+ private static final String CREATE_STATEMENT = "CREATE TABLE employees (id BIGINT, name VARCHAR)";
+
+ @Test
+ void defaultCasingIsToUpper() {
+ ConverterProvider provider = new ConverterProvider();
+ assertEquals(Casing.TO_UPPER, provider.getSqlParserConfig().unquotedCasing());
+ assertEquals(Casing.TO_UPPER, provider.getUnquotedCasing());
+ }
+
+ @Test
+ void constructorCasingUnchanged() {
+ ConverterProvider provider = new ConverterProvider(Casing.UNCHANGED);
+ assertEquals(Casing.UNCHANGED, provider.getSqlParserConfig().unquotedCasing());
+ assertEquals(Casing.UNCHANGED, provider.getUnquotedCasing());
+ }
+
+ @Test
+ void constructorCasingToLower() {
+ ConverterProvider provider = new ConverterProvider(Casing.TO_LOWER);
+ assertEquals(Casing.TO_LOWER, provider.getSqlParserConfig().unquotedCasing());
+ assertEquals(Casing.TO_LOWER, provider.getUnquotedCasing());
+ }
+
+ /**
+ * With the default {@link Casing#TO_UPPER}, both CREATE TABLE and SELECT fold unquoted
+ * identifiers to upper-case. The resulting {@link NamedScan} table name is {@code EMPLOYEES}.
+ */
+ @Test
+ void defaultCasingFoldsTableNameToUpper() throws Exception {
+ ConverterProvider provider = new ConverterProvider();
+ Prepare.CatalogReader catalog =
+ SubstraitCreateStatementParser.processCreateStatementsToCatalog(provider, CREATE_STATEMENT);
+
+ Plan plan = new SqlToSubstrait(provider).convert("SELECT id FROM employees", catalog);
+
+ NamedScan scan = (NamedScan) ((Project) plan.getRoots().get(0).getInput()).getInput();
+ assertEquals("EMPLOYEES", scan.getNames().get(0));
+ }
+
+ /**
+ * With {@link Casing#UNCHANGED}, both CREATE TABLE and SELECT preserve the written casing of
+ * unquoted identifiers. A query using lowercase {@code employees} against a catalog built from
+ * {@code CREATE TABLE employees} therefore produces a {@link NamedScan} with name {@code
+ * employees}.
+ */
+ @Test
+ void unchangedCasingPreservesTableName() throws Exception {
+ ConverterProvider provider = new ConverterProvider(Casing.UNCHANGED);
+ Prepare.CatalogReader catalog =
+ SubstraitCreateStatementParser.processCreateStatementsToCatalog(provider, CREATE_STATEMENT);
+
+ Plan plan = new SqlToSubstrait(provider).convert("SELECT id FROM employees", catalog);
+
+ NamedScan scan = (NamedScan) ((Project) plan.getRoots().get(0).getInput()).getInput();
+ assertEquals("employees", scan.getNames().get(0));
+ }
+}