diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java index 85f44a312..0837b1325 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java @@ -131,6 +131,11 @@ public class FunctionMappings { s(SqlLibraryOperators.STARTS_WITH, "starts_with"), s(SqlLibraryOperators.ENDS_WITH, "ends_with"), s(SqlLibraryOperators.CONTAINS_SUBSTR, "contains"), + // Two-argument REGEXP_EXTRACT(value, regexp) maps to the two-argument + // regexp_match_substring(input, pattern), which returns the substring matching the + // full pattern. Patterns containing a capture group are not handled specially here: + // the full match is returned rather than the captured group. + s(SqlLibraryOperators.REGEXP_EXTRACT, "regexp_match_substring"), s(SqlStdOperatorTable.POSITION, "strpos"), s(SqlLibraryOperators.LEFT, "left"), s(SqlLibraryOperators.RIGHT, "right"), diff --git a/isthmus/src/test/java/io/substrait/isthmus/AutomaticDynamicFunctionMappingRoundtripTest.java b/isthmus/src/test/java/io/substrait/isthmus/AutomaticDynamicFunctionMappingRoundtripTest.java index 03f0c8f0f..1b6a3f59f 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/AutomaticDynamicFunctionMappingRoundtripTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/AutomaticDynamicFunctionMappingRoundtripTest.java @@ -13,12 +13,12 @@ * Roundtrip test for the AutomaticDynamicFunctionMappingConverterProvider feature. * *
This test verifies that: 1. Substrait plans using unmapped functions (like strftime or - * regexp_match_substring from extensions) are successfully converted. 2. With + * regexp_count_substring from extensions) are successfully converted. 2. With * AutomaticDynamicFunctionMappingConverterProvider enabled, these unmapped functions are * dynamically mapped to Calcite operators. 3. The roundtrip conversion (Substrait → Calcite → * Substrait) is stable, including for SQL queries. * - *
The test uses unmapped functions like strftime and regexp_match_substring that are defined in + *
The test uses unmapped functions like strftime and regexp_count_substring that are defined in * extension YAML but not in FunctionMappings. */ class AutomaticDynamicFunctionMappingRoundtripTest extends PlanTestBase { @@ -95,7 +95,7 @@ void testUnmappedStrftimeSqlRoundtrip() throws Exception { * Test roundtrip with SQL query using multiple unmapped functions. * *
This test verifies that SQL queries with multiple unmapped function calls (like - * regexp_match_substring) can be handled when AutomaticDynamicFunctionMappingConverterProvider is + * regexp_count_substring) can be handled when AutomaticDynamicFunctionMappingConverterProvider is * used. The operator table is populated with unmapped function signatures, allowing occurrences * of unmapped functions to be recognized during SQL parsing and conversion. */ @@ -103,7 +103,7 @@ void testUnmappedStrftimeSqlRoundtrip() throws Exception { void testMultipleUnmappedFunctionsSqlRoundtrip() throws Exception { String createStatements = "CREATE TABLE t (date_str VARCHAR, ts_str VARCHAR)"; String query = - "SELECT regexp_match_substring(date_str, '^[0-9]{4}') AS parsed_date, regexp_match_substring(ts_str, '^[0-9]{4}') AS parsed_ts FROM t"; + "SELECT regexp_count_substring(date_str, '[0-9]') AS date_digits, regexp_count_substring(ts_str, '[0-9]') AS ts_digits FROM t"; // Perform roundtrip with multiple unmapped function calls assertSqlSubstraitRelRoundTripLoosePojoComparison( diff --git a/isthmus/src/test/java/io/substrait/isthmus/StringFunctionTest.java b/isthmus/src/test/java/io/substrait/isthmus/StringFunctionTest.java index 324b562de..eb6696ee6 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/StringFunctionTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/StringFunctionTest.java @@ -229,6 +229,13 @@ void testContainsWithLiteral(String left, String right) throws Exception { assertSqlRoundTrip(query); } + @ParameterizedTest + @ValueSource(strings = {"c16", "vc32", "vc"}) + void testRegexpExtract(String column) throws Exception { + String query = String.format("SELECT REGEXP_EXTRACT(%s, '[0-9]+') FROM strings", column); + assertSqlRoundTrip(query); + } + @ParameterizedTest @CsvSource({"c16, c16", "c16, vc32", "c16, vc", "vc32, vc32", "vc32, vc", "vc, vc"}) void testPosition(String substring, String input) throws Exception {