From 283d871e546e28da369fec9dcb088cb7b9976da2 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Thu, 2 Jul 2026 21:15:38 +0200 Subject: [PATCH] feat(isthmus)!: make unquoted identifier casing configurable in ConverterProvider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add constructor-based configuration of unquoted SQL identifier casing to ConverterProvider, so that isthmus consumers can control how unquoted identifiers are cased during parsing. The default remains Casing.TO_UPPER (no behaviour change). Previously the only way to change this was to subclass ConverterProvider and override getSqlParserConfig() — as IsthmusEntryPoint already did with an anonymous class. That workaround is now replaced by a first-class constructor parameter. Changes to ConverterProvider: - unquotedCasing is a new final field, consistent with executionBehavior - getUnquotedCasing() getter - getSqlParserConfig() reads unquotedCasing instead of hard-coding TO_UPPER - new ConverterProvider(Casing) and ConverterProvider(extensions, typeFactory, Casing) for the common cases; the existing 7-arg constructor gains Casing as an 8th parameter; all narrower constructors default to Casing.TO_UPPER Propagation through the pipeline — casing is applied consistently across both CREATE TABLE parsing and query parsing: - SubstraitSqlToCalcite: new convertQueries(sql, catalog, ConverterProvider, operatorTable) overload passes getSqlParserConfig() to the statement parser - SqlToSubstrait: convert(sql, catalog) uses the ConverterProvider overload; the legacy convert(sql, catalog, SqlDialect) overload is deprecated and now delegates to it (the SqlDialect argument is ignored — casing is controlled by the ConverterProvider) - SubstraitCreateStatementParser: new processCreateStatements(ConverterProvider, sql) and processCreateStatementsToCatalog(ConverterProvider, ...) overloads; SqlParser.Config stays an internal detail - SqlExpressionToSubstrait: uses processCreateStatements(converterProvider, tableDef) - IsthmusEntryPoint: uses new ConverterProvider(unquotedCasing); anonymous ConverterProvider subclass removed Examples: - FromSql: replaced the deprecated convert(sql, catalog, SqlDialect) call with a shared ConverterProvider(Casing.UNCHANGED) used for both schema and query parsing, preserving the lower-case identifiers as written BREAKING CHANGE: The SqlParser.Config-based parsing entry points have been removed in favour of ConverterProvider overloads: - SubstraitSqlStatementParser.parseStatements(String, SqlParser.Config) - SubstraitSqlToCalcite.convertQueries(String, CatalogReader, SqlParser.Config) - SubstraitSqlToCalcite.convertQueries(String, CatalogReader, SqlValidator, RelOptCluster, SqlParser.Config) Callers should pass a ConverterProvider (configured for the desired casing, or subclassed with an overridden getSqlParserConfig() for fully custom parser settings) instead of a SqlParser.Config. --- .../java/io/substrait/examples/FromSql.java | 23 ++-- .../isthmus/cli/IsthmusEntryPoint.java | 17 +-- .../substrait/isthmus/ConverterProvider.java | 93 ++++++++++++- .../isthmus/SqlExpressionToSubstrait.java | 4 +- .../io/substrait/isthmus/SqlToSubstrait.java | 34 +++-- .../io/substrait/isthmus/SubstraitToSql.java | 2 +- .../sql/SubstraitCreateStatementParser.java | 123 +++++++++++++----- .../sql/SubstraitSqlStatementParser.java | 29 ++--- .../isthmus/sql/SubstraitSqlToCalcite.java | 71 ++++++---- .../isthmus/DdlToSubstraitConversionTest.java | 6 +- ...bstraitConversionWithOptimizationTest.java | 11 +- .../substrait/isthmus/UnquotedCasingTest.java | 76 +++++++++++ 12 files changed, 356 insertions(+), 133 deletions(-) create mode 100644 isthmus/src/test/java/io/substrait/isthmus/UnquotedCasingTest.java diff --git a/examples/isthmus-api/src/main/java/io/substrait/examples/FromSql.java b/examples/isthmus-api/src/main/java/io/substrait/examples/FromSql.java index 17973b683..a9cf16fd9 100644 --- a/examples/isthmus-api/src/main/java/io/substrait/examples/FromSql.java +++ b/examples/isthmus-api/src/main/java/io/substrait/examples/FromSql.java @@ -1,6 +1,7 @@ package io.substrait.examples; import io.substrait.examples.IsthmusAppExamples.Action; +import io.substrait.isthmus.ConverterProvider; import io.substrait.isthmus.SqlToSubstrait; import io.substrait.isthmus.sql.SubstraitCreateStatementParser; import io.substrait.plan.Plan; @@ -10,8 +11,8 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; +import org.apache.calcite.avatica.util.Casing; import org.apache.calcite.prepare.CalciteCatalogReader; -import org.apache.calcite.sql.SqlDialect; import org.apache.calcite.sql.parser.SqlParseException; /** @@ -22,7 +23,7 @@ *

1. Create a fully typed schema for the inputs. Within a SQL context this represents the CREATE * TABLE commands, which need to be converted to a Calcite Schema. * - *

2. Parse the SQL query to convert (in the source SQL dialect). + *

2. Parse the SQL query to convert. * *

3. Convert the SQL query to Calcite Relations. * @@ -49,22 +50,26 @@ public void run(final String[] args) { "test_result" varchar(15),"test_mileage" int, "postcode_area" varchar(15)); """); + // The unquoted identifier casing applied while parsing is configurable via a + // ConverterProvider. The same provider is used for both the schema and the query so that + // identifier casing stays consistent end-to-end. Casing.UNCHANGED preserves identifiers as + // written, matching the lower-case names used in the CREATE TABLE statements above. + final ConverterProvider converterProvider = new ConverterProvider(Casing.UNCHANGED); + final CalciteCatalogReader catalogReader = - SubstraitCreateStatementParser.processCreateStatementsToCatalog(createSqlStatements); + SubstraitCreateStatementParser.processCreateStatementsToCatalog( + converterProvider, createSqlStatements); - // Query that needs to be converted; again this could be in a variety of SQL - // dialects + // Query that needs to be converted final String sqlQuery = """ SELECT vehicles.colour, count(*) as colourcount FROM vehicles INNER JOIN tests ON vehicles.vehicle_id=tests.vehicle_id WHERE tests.test_result = 'P' GROUP BY vehicles.colour ORDER BY count(*) """; - final SqlToSubstrait sqlToSubstrait = new SqlToSubstrait(); - // choose DuckDB as an example dialect - final SqlDialect dialect = SqlDialect.DatabaseProduct.DUCKDB.getDialect(); - final Plan substraitPlan = sqlToSubstrait.convert(sqlQuery, catalogReader, dialect); + final SqlToSubstrait sqlToSubstrait = new SqlToSubstrait(converterProvider); + final Plan substraitPlan = sqlToSubstrait.convert(sqlQuery, catalogReader); // Create the proto plan to display to stdout - as it has a better format final PlanProtoConverter planToProto = new PlanProtoConverter(); diff --git a/isthmus-cli/src/main/java/io/substrait/isthmus/cli/IsthmusEntryPoint.java b/isthmus-cli/src/main/java/io/substrait/isthmus/cli/IsthmusEntryPoint.java index 03d4fef2b..39566ebf3 100644 --- a/isthmus-cli/src/main/java/io/substrait/isthmus/cli/IsthmusEntryPoint.java +++ b/isthmus-cli/src/main/java/io/substrait/isthmus/cli/IsthmusEntryPoint.java @@ -15,7 +15,6 @@ import java.util.concurrent.Callable; import org.apache.calcite.avatica.util.Casing; import org.apache.calcite.prepare.Prepare; -import org.apache.calcite.sql.parser.SqlParser; import picocli.CommandLine; import picocli.CommandLine.Command; import picocli.CommandLine.Option; @@ -60,15 +59,6 @@ enum OutputFormat { description = "Calcite's casing policy for unquoted identifiers: ${COMPLETION-CANDIDATES}") private Casing unquotedCasing = Casing.TO_UPPER; - private ConverterProvider converterProvider() { - return new ConverterProvider() { - @Override - public SqlParser.Config getSqlParserConfig() { - return super.getSqlParserConfig().withUnquotedCasing(unquotedCasing); - } - }; - } - /** * Standard Java Main method invoked by the isthmus CLI command. * @@ -96,16 +86,17 @@ public static void main(String... args) { @Override public Integer call() throws Exception { + ConverterProvider provider = new ConverterProvider(unquotedCasing); // Isthmus image is parsing SQL Expression if that argument is defined if (sqlExpressions != null) { - SqlExpressionToSubstrait converter = new SqlExpressionToSubstrait(converterProvider()); + SqlExpressionToSubstrait converter = new SqlExpressionToSubstrait(provider); ExtendedExpression extendedExpression = converter.convert(sqlExpressions, createStatements); printMessage(extendedExpression); } else { // by default Isthmus image are parsing SQL Query - SqlToSubstrait converter = new SqlToSubstrait(converterProvider()); + SqlToSubstrait converter = new SqlToSubstrait(provider); Prepare.CatalogReader catalog = SubstraitCreateStatementParser.processCreateStatementsToCatalog( - createStatements.toArray(String[]::new)); + provider, createStatements); Plan plan = new PlanProtoConverter().toProto(converter.convert(sql, catalog)); printMessage(plan); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/ConverterProvider.java b/isthmus/src/main/java/io/substrait/isthmus/ConverterProvider.java index 72e77586d..a9836b7bf 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/ConverterProvider.java +++ b/isthmus/src/main/java/io/substrait/isthmus/ConverterProvider.java @@ -49,12 +49,24 @@ */ public class ConverterProvider { + /** + * A shared default {@link ConverterProvider} instance using all system defaults. Equivalent to + * {@code new ConverterProvider()} but avoids redundant construction at every call site. + * + *

This instance is safe to share because {@link ConverterProvider} is effectively immutable + * after construction — all fields are set only in constructors. + */ + public static final ConverterProvider DEFAULT = new ConverterProvider(); + /** The Calcite type factory used for creating and managing data types. */ protected RelDataTypeFactory typeFactory; /** The collection of Substrait extensions (functions and types) available for conversion. */ protected final SimpleExtension.ExtensionCollection extensions; + /** The casing applied to unquoted SQL identifiers during parsing. */ + protected final Casing unquotedCasing; + /** Converter for Substrait scalar functions. */ protected ScalarFunctionConverter scalarFunctionConverter; @@ -78,6 +90,21 @@ public ConverterProvider() { this(DefaultExtensionCatalog.DEFAULT_COLLECTION, SubstraitTypeSystem.TYPE_FACTORY); } + /** + * Creates a ConverterProvider with the specified unquoted identifier casing. + * + *

Uses {@link DefaultExtensionCatalog#DEFAULT_COLLECTION} and {@link + * SubstraitTypeSystem#TYPE_FACTORY}. + * + * @param unquotedCasing the casing to apply to unquoted SQL identifiers during parsing + */ + public ConverterProvider(Casing unquotedCasing) { + this( + DefaultExtensionCatalog.DEFAULT_COLLECTION, + SubstraitTypeSystem.TYPE_FACTORY, + unquotedCasing); + } + /** * Creates a ConverterProvider with the specified extension collection and default type factory. * @@ -95,13 +122,30 @@ public ConverterProvider(SimpleExtension.ExtensionCollection extensions) { */ public ConverterProvider( SimpleExtension.ExtensionCollection extensions, RelDataTypeFactory typeFactory) { + this(extensions, typeFactory, Casing.TO_UPPER); + } + + /** + * Creates a ConverterProvider with the specified extension collection, type factory, and unquoted + * identifier casing. + * + * @param extensions the Substrait extension collection to use + * @param typeFactory the Calcite type factory to use + * @param unquotedCasing the casing to apply to unquoted SQL identifiers during parsing + */ + public ConverterProvider( + SimpleExtension.ExtensionCollection extensions, + RelDataTypeFactory typeFactory, + Casing unquotedCasing) { this( typeFactory, extensions, new ScalarFunctionConverter(extensions.scalarFunctions(), typeFactory), new AggregateFunctionConverter(extensions.aggregateFunctions(), typeFactory), new WindowFunctionConverter(extensions.windowFunctions(), typeFactory), - TypeConverter.DEFAULT); + TypeConverter.DEFAULT, + createDefaultExecutionBehavior(), + unquotedCasing); } /** @@ -121,7 +165,15 @@ public ConverterProvider( AggregateFunctionConverter afc, WindowFunctionConverter wfc, TypeConverter tc) { - this(typeFactory, extensions, sfc, afc, wfc, tc, createDefaultExecutionBehavior()); + this( + typeFactory, + extensions, + sfc, + afc, + wfc, + tc, + createDefaultExecutionBehavior(), + Casing.TO_UPPER); } /** @@ -143,6 +195,31 @@ public ConverterProvider( WindowFunctionConverter wfc, TypeConverter tc, Plan.ExecutionBehavior executionBehavior) { + this(typeFactory, extensions, sfc, afc, wfc, tc, executionBehavior, Casing.TO_UPPER); + } + + /** + * Creates a ConverterProvider with full customization including execution behavior and unquoted + * identifier casing. + * + * @param typeFactory the Calcite type factory to use + * @param extensions the Substrait extension collection to use + * @param sfc the scalar function converter to use + * @param afc the aggregate function converter to use + * @param wfc the window function converter to use + * @param tc the type converter to use + * @param executionBehavior the execution behavior to use for plans + * @param unquotedCasing the casing to apply to unquoted SQL identifiers during parsing + */ + public ConverterProvider( + RelDataTypeFactory typeFactory, + SimpleExtension.ExtensionCollection extensions, + ScalarFunctionConverter sfc, + AggregateFunctionConverter afc, + WindowFunctionConverter wfc, + TypeConverter tc, + Plan.ExecutionBehavior executionBehavior, + Casing unquotedCasing) { this.typeFactory = typeFactory; this.extensions = extensions; this.scalarFunctionConverter = sfc; @@ -150,6 +227,7 @@ public ConverterProvider( this.windowFunctionConverter = wfc; this.typeConverter = tc; this.executionBehavior = executionBehavior; + this.unquotedCasing = unquotedCasing; } /** @@ -165,6 +243,15 @@ private static Plan.ExecutionBehavior createDefaultExecutionBehavior() { // SQL to Calcite Processing + /** + * Returns the casing applied to unquoted SQL identifiers during parsing. + * + * @return the unquoted identifier casing + */ + public Casing getUnquotedCasing() { + return unquotedCasing; + } + /** * {@link SqlParser.Config} is a Calcite class which controls SQL parsing behaviour like * identifier casing. @@ -173,7 +260,7 @@ private static Plan.ExecutionBehavior createDefaultExecutionBehavior() { */ public SqlParser.Config getSqlParserConfig() { return SqlParser.Config.DEFAULT - .withUnquotedCasing(Casing.TO_UPPER) + .withUnquotedCasing(unquotedCasing) .withParserFactory(SqlDdlParserImpl.FACTORY) .withConformance(SqlConformanceEnum.LENIENT); } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java index 5c020d5d1..9637cd6fd 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlExpressionToSubstrait.java @@ -40,7 +40,7 @@ public class SqlExpressionToSubstrait extends SqlConverterBase { /** Creates a converter with default configuration. */ public SqlExpressionToSubstrait() { - this(new ConverterProvider()); + this(ConverterProvider.DEFAULT); } /** @@ -202,7 +202,7 @@ private Result registerCreateTablesForExtendedExpression(List tables) if (tables != null) { for (String tableDef : tables) { List tList = - SubstraitCreateStatementParser.processCreateStatements(tableDef); + SubstraitCreateStatementParser.processCreateStatements(converterProvider, tableDef); for (SubstraitTable t : tList) { rootSchema.add(t.getName(), t); for (RelDataTypeField field : t.getRowType(factory).getFieldList()) { diff --git a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java index 765dd8dfd..36a142931 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SqlToSubstrait.java @@ -5,11 +5,9 @@ import io.substrait.plan.Plan; import io.substrait.plan.Plan.Version; import org.apache.calcite.prepare.Prepare; -import org.apache.calcite.server.ServerDdlExecutor; import org.apache.calcite.sql.SqlDialect; import org.apache.calcite.sql.SqlOperatorTable; import org.apache.calcite.sql.parser.SqlParseException; -import org.apache.calcite.sql.parser.SqlParser; /** * Take a SQL statement and a set of table definitions and return a substrait plan. @@ -21,7 +19,7 @@ public class SqlToSubstrait extends SqlConverterBase { /** Creates a SQL-to-Substrait converter using the default configuration. */ public SqlToSubstrait() { - this(new ConverterProvider()); + this(ConverterProvider.DEFAULT); } /** @@ -50,7 +48,9 @@ public Plan convert(final String sqlStatements, final Prepare.CatalogReader cata builder.executionBehavior(converterProvider.getExecutionBehavior()); // TODO: consider case in which one sql passes conversion while others don't - SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader, operatorTable).stream() + SubstraitSqlToCalcite.convertQueries( + sqlStatements, catalogReader, converterProvider, operatorTable) + .stream() .map(root -> SubstraitRelVisitor.convert(root, converterProvider)) .forEach(root -> builder.addRoots(root)); @@ -60,31 +60,29 @@ public Plan convert(final String sqlStatements, final Prepare.CatalogReader cata /** * Converts one or more SQL statements into a Substrait {@link Plan}. * + *

The {@code sqlDialect} parameter was previously used to influence identifier casing during + * parsing. This is now configurable directly via {@link + * ConverterProvider#ConverterProvider(org.apache.calcite.avatica.util.Casing)}, or for fully + * custom parser behaviour, by subclassing {@link ConverterProvider} and overriding {@link + * ConverterProvider#getSqlParserConfig()}. + * * @param sqlStatements a string containing one more SQL statements * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in * the SQL statements * @param sqlDialect The sql dialect to use for parsing. * @return the Substrait {@link Plan} * @throws SqlParseException if there is an error while parsing the SQL statements + * @deprecated Prefer constructing {@link SqlToSubstrait} with a {@link ConverterProvider} + * configured for the desired casing and calling {@link #convert(String, + * Prepare.CatalogReader)}. For fully custom parser behaviour, subclass {@link + * ConverterProvider} and override {@link ConverterProvider#getSqlParserConfig()}. */ + @Deprecated public Plan convert( final String sqlStatements, final Prepare.CatalogReader catalogReader, final SqlDialect sqlDialect) throws SqlParseException { - Builder builder = io.substrait.plan.Plan.builder(); - builder.version(Version.builder().from(Version.DEFAULT_VERSION).producer("isthmus").build()); - builder.executionBehavior(converterProvider.getExecutionBehavior()); - - final SqlParser.Config sqlParserConfig = - sqlDialect.configureParser( - SqlParser.config().withParserFactory(ServerDdlExecutor.PARSER_FACTORY)); - - // TODO: consider case in which one sql passes conversion while others don't - SubstraitSqlToCalcite.convertQueries(sqlStatements, catalogReader, sqlParserConfig).stream() - .map(root -> SubstraitRelVisitor.convert(root, converterProvider)) - .forEach(root -> builder.addRoots(root)); - - return builder.build(); + return convert(sqlStatements, catalogReader); } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java index 690bd5774..ac07d8d65 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java +++ b/isthmus/src/main/java/io/substrait/isthmus/SubstraitToSql.java @@ -23,7 +23,7 @@ public class SubstraitToSql extends SqlConverterBase { /** Creates a SubstraitToSql converter with default configuration and extensions. */ public SubstraitToSql() { - this(new ConverterProvider()); + this(ConverterProvider.DEFAULT); } /** diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java index b008d577d..f8179e397 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitCreateStatementParser.java @@ -1,5 +1,6 @@ package io.substrait.isthmus.sql; +import io.substrait.isthmus.ConverterProvider; import io.substrait.isthmus.SqlConverterBase; import io.substrait.isthmus.SubstraitTypeSystem; import io.substrait.isthmus.Utils; @@ -50,28 +51,7 @@ public class SubstraitCreateStatementParser { */ public static List processCreateStatements(@NonNull final String createStatements) throws SqlParseException { - final List tableList = new ArrayList<>(); - - final List sqlNode = SubstraitSqlStatementParser.parseStatements(createStatements); - for (final SqlNode parsed : sqlNode) { - if (!(parsed instanceof SqlCreateTable)) { - throw fail("Not a valid CREATE TABLE statement."); - } - - final SqlCreateTable create = (SqlCreateTable) parsed; - - if (create.name.names.size() > 1) { - throw fail("Only simple table names are allowed.", create.name.getParserPosition()); - } - - if (create.query != null) { - throw fail("CTAS not supported.", create.name.getParserPosition()); - } - - tableList.add(createSubstraitTable(create.name.names.get(0), create.columnList)); - } - - return tableList; + return processCreateStatements(ConverterProvider.DEFAULT, createStatements); } /** @@ -89,6 +69,26 @@ public static CalciteCatalogReader processCreateStatementsToCatalog( return processCreateStatementsToCatalog(createStatements.toArray(new String[0])); } + /** + * Parses one or more SQL strings containing only CREATE statements into a {@link + * CalciteCatalogReader}, using the parser settings from the given {@link ConverterProvider}. + * + *

This method expects the use of fully qualified table names in the CREATE statements. + * + * @param converterProvider the converter provider whose parser config controls identifier casing + * and other parser settings + * @param createStatements List of SQL strings containing only CREATE statements, must not be null + * @return a {@link CalciteCatalogReader} generated from the CREATE statements + * @throws SqlParseException if there is an error parsing the SQL statements + */ + public static CalciteCatalogReader processCreateStatementsToCatalog( + @NonNull final ConverterProvider converterProvider, + @NonNull final List createStatements) + throws SqlParseException { + return processCreateStatementsToCatalog( + converterProvider, createStatements.toArray(new String[0])); + } + /** * Parses one or more SQL strings containing only CREATE statements into a {@link * CalciteCatalogReader} @@ -101,7 +101,33 @@ public static CalciteCatalogReader processCreateStatementsToCatalog( */ public static CalciteCatalogReader processCreateStatementsToCatalog( @NonNull final String... createStatements) throws SqlParseException { - final CalciteSchema rootSchema = processCreateStatementsToSchema(createStatements); + final CalciteSchema rootSchema = + processCreateStatementsToSchema(ConverterProvider.DEFAULT, createStatements); + final List defaultSchema = Collections.emptyList(); + return new CalciteCatalogReader( + rootSchema, + defaultSchema, + SubstraitTypeSystem.TYPE_FACTORY, + SqlConverterBase.CONNECTION_CONFIG); + } + + /** + * Parses one or more SQL strings containing only CREATE statements into a {@link + * CalciteCatalogReader}, using the parser settings from the given {@link ConverterProvider}. + * + *

This method expects the use of fully qualified table names in the CREATE statements. + * + * @param converterProvider the converter provider whose parser config controls identifier casing + * and other parser settings + * @param createStatements a SQL string containing only CREATE statements, must not be null + * @return a {@link CalciteCatalogReader} generated from the CREATE statements + * @throws SqlParseException if parsing fails or statements are invalid + */ + public static CalciteCatalogReader processCreateStatementsToCatalog( + @NonNull final ConverterProvider converterProvider, @NonNull final String... createStatements) + throws SqlParseException { + final CalciteSchema rootSchema = + processCreateStatementsToSchema(converterProvider, createStatements); final List defaultSchema = Collections.emptyList(); return new CalciteCatalogReader( rootSchema, @@ -133,19 +159,58 @@ private static SqlParseException fail(@Nullable final String message) { } /** - * Parses one or more SQL strings containing only CREATE statements into a {@link CalciteSchema}. + * Parses a SQL string containing only CREATE statements into a list of {@link SubstraitTable}s, + * using the parser settings from the given {@link ConverterProvider}. + * + *

This method only supports simple table names without any additional qualifiers. Only used + * with {@link io.substrait.isthmus.SqlExpressionToSubstrait}. * - * @param createStatements one or more SQL strings containing only CREATE statements; must not be - * null - * @return a {@link CalciteSchema} generated from the CREATE statements + * @param converterProvider the converter provider whose parser config controls identifier casing + * and other parser settings + * @param createStatements a SQL string containing only CREATE statements; must not be null + * @return list of {@link SubstraitTable}s generated from the CREATE statements * @throws SqlParseException if parsing fails or statements are invalid */ + public static List processCreateStatements( + @NonNull final ConverterProvider converterProvider, @NonNull final String createStatements) + throws SqlParseException { + final List tableList = new ArrayList<>(); + + final List sqlNode = + SubstraitSqlStatementParser.parseStatements(createStatements, converterProvider); + for (final SqlNode parsed : sqlNode) { + if (!(parsed instanceof SqlCreateTable)) { + throw fail("Not a valid CREATE TABLE statement."); + } + + final SqlCreateTable create = (SqlCreateTable) parsed; + + if (create.name.names.size() > 1) { + throw fail("Only simple table names are allowed.", create.name.getParserPosition()); + } + + if (create.query != null) { + throw fail("CTAS not supported.", create.name.getParserPosition()); + } + + tableList.add(createSubstraitTable(create.name.names.get(0), create.columnList)); + } + + return tableList; + } + + /** + * Parses one or more SQL strings containing only CREATE statements into a {@link CalciteSchema} + * using the given provider's parser config. + */ private static CalciteSchema processCreateStatementsToSchema( - @NonNull final String... createStatements) throws SqlParseException { + @NonNull final ConverterProvider converterProvider, @NonNull final String... createStatements) + throws SqlParseException { final CalciteSchema rootSchema = CalciteSchema.createRootSchema(false); for (final String statement : createStatements) { - final List sqlNode = SubstraitSqlStatementParser.parseStatements(statement); + final List sqlNode = + SubstraitSqlStatementParser.parseStatements(statement, converterProvider); for (final SqlNode parsed : sqlNode) { if (!(parsed instanceof SqlCreateTable)) { throw fail("Not a valid CREATE TABLE statement."); diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java index 8da220c84..c6ddb719c 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlStatementParser.java @@ -1,12 +1,10 @@ package io.substrait.isthmus.sql; +import io.substrait.isthmus.ConverterProvider; import java.util.List; -import org.apache.calcite.avatica.util.Casing; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; -import org.apache.calcite.sql.parser.ddl.SqlDdlParserImpl; -import org.apache.calcite.sql.validate.SqlConformanceEnum; /** * Utility class for parsing SQL statements to {@link SqlNode}s using a Substrait flavoured SQL @@ -14,14 +12,6 @@ */ public class SubstraitSqlStatementParser { - private static final SqlParser.Config PARSER_CONFIG = - SqlParser.config() - // TODO: switch to Casing.UNCHANGED - .withUnquotedCasing(Casing.TO_UPPER) - // use LENIENT conformance to allow for parsing a wide variety of dialects - .withConformance(SqlConformanceEnum.LENIENT) - .withParserFactory(SqlDdlParserImpl.FACTORY); - /** * Parse one or more SQL statements to a list of {@link SqlNode}s. * @@ -30,20 +20,25 @@ public class SubstraitSqlStatementParser { * @throws SqlParseException if there is an error while parsing the SQL statements */ public static List parseStatements(String sqlStatements) throws SqlParseException { - return parseStatements(sqlStatements, PARSER_CONFIG); + return parseStatements(sqlStatements, ConverterProvider.DEFAULT); } /** - * Parse one or more SQL statements to a list of {@link SqlNode}s. + * Parse one or more SQL statements to a list of {@link SqlNode}s, using the parser settings from + * the given {@link ConverterProvider}. + * + *

To use a fully custom parser configuration, subclass {@link ConverterProvider} and override + * {@link ConverterProvider#getSqlParserConfig()}. * * @param sqlStatements a string containing one or more SQL statements - * @param parserConfig Calcite SqlParser.Config to control the parser + * @param converterProvider the converter provider whose parser config controls identifier casing + * and other parser settings * @return a list of {@link SqlNode}s corresponding to the given statements * @throws SqlParseException if there is an error while parsing the SQL statements */ - public static List parseStatements(String sqlStatements, SqlParser.Config parserConfig) - throws SqlParseException { - SqlParser parser = SqlParser.create(sqlStatements, parserConfig); + public static List parseStatements( + String sqlStatements, ConverterProvider converterProvider) throws SqlParseException { + SqlParser parser = SqlParser.create(sqlStatements, converterProvider.getSqlParserConfig()); return parser.parseStmtList(); } } diff --git a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java index d2ec84111..7855f5263 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java +++ b/isthmus/src/main/java/io/substrait/isthmus/sql/SubstraitSqlToCalcite.java @@ -1,5 +1,6 @@ package io.substrait.isthmus.sql; +import io.substrait.isthmus.ConverterProvider; import io.substrait.isthmus.SubstraitTypeSystem; import io.substrait.isthmus.calcite.rel.DdlSqlToRelConverter; import java.util.List; @@ -17,7 +18,6 @@ import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperatorTable; import org.apache.calcite.sql.parser.SqlParseException; -import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.sql.validate.SqlValidator; import org.apache.calcite.sql2rel.SqlToRelConverter; import org.apache.calcite.sql2rel.StandardConvertletTable; @@ -81,7 +81,18 @@ public static RelRoot convertQuery( SqlValidator validator, RelOptCluster cluster) throws SqlParseException { - List sqlNodes = SubstraitSqlStatementParser.parseStatements(sqlStatement); + return convertQuery(sqlStatement, catalogReader, validator, cluster, ConverterProvider.DEFAULT); + } + + static RelRoot convertQuery( + String sqlStatement, + Prepare.CatalogReader catalogReader, + SqlValidator validator, + RelOptCluster cluster, + ConverterProvider converterProvider) + throws SqlParseException { + List sqlNodes = + SubstraitSqlStatementParser.parseStatements(sqlStatement, converterProvider); if (sqlNodes.size() != 1) { throw new IllegalArgumentException( String.format("Expected one statement, found: %d", sqlNodes.size())); @@ -111,39 +122,48 @@ public static List convertQueries( /** * Converts one or more SQL statements to a List of {@link RelRoot}, with one {@link RelRoot} per - * statement. + * statement, using the parser configuration from the given {@link ConverterProvider}. * * @param sqlStatements a string containing one or more SQL statements * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in * the SQL statements + * @param converterProvider the converter provider whose parser config controls identifier casing + * and other parser settings + * @param operatorTable the {@link SqlOperatorTable} for controlling valid operators * @return a list of {@link RelRoot}s corresponding to the given SQL statements * @throws SqlParseException if there is an error while parsing the SQL statements */ public static List convertQueries( - String sqlStatements, Prepare.CatalogReader catalogReader) throws SqlParseException { - SqlValidator validator = new SubstraitSqlValidator(catalogReader); - return convertQueries(sqlStatements, catalogReader, validator, createDefaultRelOptCluster()); + String sqlStatements, + Prepare.CatalogReader catalogReader, + ConverterProvider converterProvider, + SqlOperatorTable operatorTable) + throws SqlParseException { + SqlValidator validator = new SubstraitSqlValidator(catalogReader, operatorTable); + return convertQueries( + sqlStatements, catalogReader, validator, createDefaultRelOptCluster(), converterProvider); } /** * Converts one or more SQL statements to a List of {@link RelRoot}, with one {@link RelRoot} per - * statement. + * statement, using the parser configuration from the given {@link ConverterProvider}. * * @param sqlStatements a string containing one or more SQL statements * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in * the SQL statements - * @param parserConfig Calcite Parser config to use with the given SQL Statements + * @param converterProvider the converter provider whose parser config controls identifier casing + * and other parser settings * @return a list of {@link RelRoot}s corresponding to the given SQL statements * @throws SqlParseException if there is an error while parsing the SQL statements */ public static List convertQueries( String sqlStatements, Prepare.CatalogReader catalogReader, - final SqlParser.Config parserConfig) + ConverterProvider converterProvider) throws SqlParseException { SqlValidator validator = new SubstraitSqlValidator(catalogReader); return convertQueries( - sqlStatements, catalogReader, validator, createDefaultRelOptCluster(), parserConfig); + sqlStatements, catalogReader, validator, createDefaultRelOptCluster(), converterProvider); } /** @@ -153,23 +173,13 @@ public static List convertQueries( * @param sqlStatements a string containing one or more SQL statements * @param catalogReader the {@link Prepare.CatalogReader} for finding tables/views referenced in * the SQL statements - * @param validator the {@link SqlValidator} used to validate SQL statements. Allows for - * additional control of SQL functions and operators via {@link - * SqlValidator#getOperatorTable()} - * @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement - * processing. Calcite expects that the {@link RelOptCluster} used during statement processing - * is the same as that used during query optimization. * @return a list of {@link RelRoot}s corresponding to the given SQL statements * @throws SqlParseException if there is an error while parsing the SQL statements */ public static List convertQueries( - String sqlStatements, - Prepare.CatalogReader catalogReader, - SqlValidator validator, - RelOptCluster cluster) - throws SqlParseException { - List sqlNodes = SubstraitSqlStatementParser.parseStatements(sqlStatements); - return convert(sqlNodes, catalogReader, validator, cluster); + String sqlStatements, Prepare.CatalogReader catalogReader) throws SqlParseException { + SqlValidator validator = new SubstraitSqlValidator(catalogReader); + return convertQueries(sqlStatements, catalogReader, validator, createDefaultRelOptCluster()); } /** @@ -185,19 +195,28 @@ public static List convertQueries( * @param cluster the {@link RelOptCluster} used when creating {@link RelNode}s during statement * processing. Calcite expects that the {@link RelOptCluster} used during statement processing * is the same as that used during query optimization. - * @param parserConfig Calcite Parser config to use with the given SQL Statements * @return a list of {@link RelRoot}s corresponding to the given SQL statements * @throws SqlParseException if there is an error while parsing the SQL statements */ public static List convertQueries( + String sqlStatements, + Prepare.CatalogReader catalogReader, + SqlValidator validator, + RelOptCluster cluster) + throws SqlParseException { + return convertQueries( + sqlStatements, catalogReader, validator, cluster, ConverterProvider.DEFAULT); + } + + static List convertQueries( String sqlStatements, Prepare.CatalogReader catalogReader, SqlValidator validator, RelOptCluster cluster, - SqlParser.Config parserConfig) + ConverterProvider converterProvider) throws SqlParseException { List sqlNodes = - SubstraitSqlStatementParser.parseStatements(sqlStatements, parserConfig); + SubstraitSqlStatementParser.parseStatements(sqlStatements, converterProvider); return convert(sqlNodes, catalogReader, validator, cluster); } diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionTest.java index fbd6a4163..760a2fc03 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionTest.java @@ -2,7 +2,6 @@ import io.substrait.isthmus.sql.SubstraitCreateStatementParser; import org.apache.calcite.prepare.Prepare; -import org.apache.calcite.sql.SqlDialect; import org.apache.calcite.sql.parser.SqlParseException; import org.junit.jupiter.api.Test; @@ -14,9 +13,6 @@ void testConversion() throws SqlParseException { "create table src1 (intcol int, charcol varchar(10))"); final SqlToSubstrait converter = new SqlToSubstrait(); - converter.convert( - "create table dst1 as select * from src1", - catalogReader, - SqlDialect.DatabaseProduct.CALCITE.getDialect()); + converter.convert("create table dst1 as select * from src1", catalogReader); } } diff --git a/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionWithOptimizationTest.java b/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionWithOptimizationTest.java index 68763615c..ef68860d6 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionWithOptimizationTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/DdlToSubstraitConversionWithOptimizationTest.java @@ -19,10 +19,7 @@ import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.rules.CoreRules; -import org.apache.calcite.server.ServerDdlExecutor; -import org.apache.calcite.sql.SqlDialect; import org.apache.calcite.sql.parser.SqlParseException; -import org.apache.calcite.sql.parser.SqlParser; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -36,12 +33,6 @@ */ class DdlToSubstraitConversionWithOptimizationTest { - /** Parser configuration for DDL statements. */ - private static final SqlParser.Config DDL_PARSER_CONFIG = - SqlDialect.DatabaseProduct.CALCITE - .getDialect() - .configureParser(SqlParser.config().withParserFactory(ServerDdlExecutor.PARSER_FACTORY)); - private final ConverterProvider converterProvider = new ConverterProvider(); /** @@ -68,7 +59,7 @@ void testDdlOptimizationMutation(String ddlType) throws SqlParseException { // Convert DDL to RelRoot List relRoots = - SubstraitSqlToCalcite.convertQueries(createStatement, catalogReader, DDL_PARSER_CONFIG); + SubstraitSqlToCalcite.convertQueries(createStatement, catalogReader, converterProvider); RelRoot relRoot = relRoots.get(0); // Apply an optimization rule (FILTER_INTO_JOIN) that will structurally mutate the children diff --git a/isthmus/src/test/java/io/substrait/isthmus/UnquotedCasingTest.java b/isthmus/src/test/java/io/substrait/isthmus/UnquotedCasingTest.java new file mode 100644 index 000000000..75affb69d --- /dev/null +++ b/isthmus/src/test/java/io/substrait/isthmus/UnquotedCasingTest.java @@ -0,0 +1,76 @@ +package io.substrait.isthmus; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.substrait.isthmus.sql.SubstraitCreateStatementParser; +import io.substrait.plan.Plan; +import io.substrait.relation.NamedScan; +import io.substrait.relation.Project; +import org.apache.calcite.avatica.util.Casing; +import org.apache.calcite.prepare.Prepare; +import org.junit.jupiter.api.Test; + +/** + * Verifies that the {@link ConverterProvider} unquoted casing parameter controls identifier casing + * consistently across both CREATE TABLE parsing and query parsing, so that the table name stored in + * a Substrait {@link NamedScan} reflects the configured casing. + */ +class UnquotedCasingTest { + + private static final String CREATE_STATEMENT = "CREATE TABLE employees (id BIGINT, name VARCHAR)"; + + @Test + void defaultCasingIsToUpper() { + ConverterProvider provider = new ConverterProvider(); + assertEquals(Casing.TO_UPPER, provider.getSqlParserConfig().unquotedCasing()); + assertEquals(Casing.TO_UPPER, provider.getUnquotedCasing()); + } + + @Test + void constructorCasingUnchanged() { + ConverterProvider provider = new ConverterProvider(Casing.UNCHANGED); + assertEquals(Casing.UNCHANGED, provider.getSqlParserConfig().unquotedCasing()); + assertEquals(Casing.UNCHANGED, provider.getUnquotedCasing()); + } + + @Test + void constructorCasingToLower() { + ConverterProvider provider = new ConverterProvider(Casing.TO_LOWER); + assertEquals(Casing.TO_LOWER, provider.getSqlParserConfig().unquotedCasing()); + assertEquals(Casing.TO_LOWER, provider.getUnquotedCasing()); + } + + /** + * With the default {@link Casing#TO_UPPER}, both CREATE TABLE and SELECT fold unquoted + * identifiers to upper-case. The resulting {@link NamedScan} table name is {@code EMPLOYEES}. + */ + @Test + void defaultCasingFoldsTableNameToUpper() throws Exception { + ConverterProvider provider = new ConverterProvider(); + Prepare.CatalogReader catalog = + SubstraitCreateStatementParser.processCreateStatementsToCatalog(provider, CREATE_STATEMENT); + + Plan plan = new SqlToSubstrait(provider).convert("SELECT id FROM employees", catalog); + + NamedScan scan = (NamedScan) ((Project) plan.getRoots().get(0).getInput()).getInput(); + assertEquals("EMPLOYEES", scan.getNames().get(0)); + } + + /** + * With {@link Casing#UNCHANGED}, both CREATE TABLE and SELECT preserve the written casing of + * unquoted identifiers. A query using lowercase {@code employees} against a catalog built from + * {@code CREATE TABLE employees} therefore produces a {@link NamedScan} with name {@code + * employees}. + */ + @Test + void unchangedCasingPreservesTableName() throws Exception { + ConverterProvider provider = new ConverterProvider(Casing.UNCHANGED); + Prepare.CatalogReader catalog = + SubstraitCreateStatementParser.processCreateStatementsToCatalog(provider, CREATE_STATEMENT); + + Plan plan = new SqlToSubstrait(provider).convert("SELECT id FROM employees", catalog); + + NamedScan scan = (NamedScan) ((Project) plan.getRoots().get(0).getInput()).getInput(); + assertEquals("employees", scan.getNames().get(0)); + } +}