Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@ public class FunctionMappings {
s(SqlLibraryOperators.STARTS_WITH, "starts_with"),
s(SqlLibraryOperators.ENDS_WITH, "ends_with"),
s(SqlLibraryOperators.CONTAINS_SUBSTR, "contains"),
// Two-argument REGEXP_EXTRACT(value, regexp) maps to the two-argument
// regexp_match_substring(input, pattern), which returns the substring matching the
// full pattern. Patterns containing a capture group are not handled specially here:
// the full match is returned rather than the captured group.
s(SqlLibraryOperators.REGEXP_EXTRACT, "regexp_match_substring"),
s(SqlStdOperatorTable.POSITION, "strpos"),
s(SqlLibraryOperators.LEFT, "left"),
s(SqlLibraryOperators.RIGHT, "right"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
* Roundtrip test for the AutomaticDynamicFunctionMappingConverterProvider feature.
*
* <p>This test verifies that: 1. Substrait plans using unmapped functions (like strftime or
* regexp_match_substring from extensions) are successfully converted. 2. With
* regexp_count_substring from extensions) are successfully converted. 2. With
* AutomaticDynamicFunctionMappingConverterProvider enabled, these unmapped functions are
* dynamically mapped to Calcite operators. 3. The roundtrip conversion (Substrait → Calcite →
* Substrait) is stable, including for SQL queries.
*
* <p>The test uses unmapped functions like strftime and regexp_match_substring that are defined in
* <p>The test uses unmapped functions like strftime and regexp_count_substring that are defined in
* extension YAML but not in FunctionMappings.
*/
class AutomaticDynamicFunctionMappingRoundtripTest extends PlanTestBase {
Expand Down Expand Up @@ -95,15 +95,15 @@ void testUnmappedStrftimeSqlRoundtrip() throws Exception {
* Test roundtrip with SQL query using multiple unmapped functions.
*
* <p>This test verifies that SQL queries with multiple unmapped function calls (like
* regexp_match_substring) can be handled when AutomaticDynamicFunctionMappingConverterProvider is
* regexp_count_substring) can be handled when AutomaticDynamicFunctionMappingConverterProvider is
* used. The operator table is populated with unmapped function signatures, allowing occurrences
* of unmapped functions to be recognized during SQL parsing and conversion.
*/
@Test
void testMultipleUnmappedFunctionsSqlRoundtrip() throws Exception {
String createStatements = "CREATE TABLE t (date_str VARCHAR, ts_str VARCHAR)";
String query =
"SELECT regexp_match_substring(date_str, '^[0-9]{4}') AS parsed_date, regexp_match_substring(ts_str, '^[0-9]{4}') AS parsed_ts FROM t";
"SELECT regexp_count_substring(date_str, '[0-9]') AS date_digits, regexp_count_substring(ts_str, '[0-9]') AS ts_digits FROM t";

// Perform roundtrip with multiple unmapped function calls
assertSqlSubstraitRelRoundTripLoosePojoComparison(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,13 @@ void testContainsWithLiteral(String left, String right) throws Exception {
assertSqlRoundTrip(query);
}

@ParameterizedTest
@ValueSource(strings = {"c16", "vc32", "vc"})
void testRegexpExtract(String column) throws Exception {
String query = String.format("SELECT REGEXP_EXTRACT(%s, '[0-9]+') FROM strings", column);
assertSqlRoundTrip(query);
}

@ParameterizedTest
@CsvSource({"c16, c16", "c16, vc32", "c16, vc", "vc32, vc32", "vc32, vc", "vc, vc"})
void testPosition(String substring, String input) throws Exception {
Expand Down
Loading