diff --git a/native/core/src/execution/jni_api.rs b/native/core/src/execution/jni_api.rs index 6dc00e9cf6..d345c33f51 100644 --- a/native/core/src/execution/jni_api.rs +++ b/native/core/src/execution/jni_api.rs @@ -618,6 +618,7 @@ fn register_datafusion_spark_function(session_ctx: &SessionContext) { session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSec::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(SparkRint::default())); session_ctx.register_udf(ScalarUDF::new_from_impl(SparkBitShift::right_unsigned())); + session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSoundex::default())); } /// Prepares arrow arrays for output. @@ -1191,6 +1192,7 @@ pub extern "system" fn Java_org_apache_comet_Native_getRustThreadId( use crate::execution::columnar_to_row::ColumnarToRowContext; use arrow::ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema}; use datafusion_spark::function::math::bin::SparkBin; +use datafusion_spark::function::string::soundex::SparkSoundex; /// Initialize a native columnar to row converter. /// diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala index 3186818c9c..00b339a195 100644 --- a/spark/src/main/scala/org/apache/comet/serde/strings.scala +++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala @@ -730,7 +730,7 @@ object CometFormatString extends CometCodegenDispatch[FormatString] object CometOverlay extends CometCodegenDispatch[Overlay] -object CometSoundEx extends CometCodegenDispatch[SoundEx] +object CometSoundEx extends CometScalarFunction[SoundEx]("soundex") object CometStringLocate extends CometCodegenDispatch[StringLocate] diff --git a/spark/src/test/resources/sql-tests/expressions/string/soundex.sql b/spark/src/test/resources/sql-tests/expressions/string/soundex.sql index b1892de0ee..3d94980277 100644 --- a/spark/src/test/resources/sql-tests/expressions/string/soundex.sql +++ b/spark/src/test/resources/sql-tests/expressions/string/soundex.sql @@ -21,7 +21,41 @@ statement CREATE TABLE test_soundex(s string) USING parquet statement -INSERT INTO test_soundex VALUES ('Miller'), ('Robert'), ('Rupert'), (''), (NULL) +INSERT INTO test_soundex VALUES + ('Miller'), + (NULL), + (''), + ('Apache Spark'), + ('123'), + ('a123'), + ('Datafusion'), + ('Ashcroft'), + ('B1B'), + ('B B'), + ('BAB'), + ('#hello'), + (' hello'), + ('\thello'), + ('😀hello'), + ('1abc'), + ('A'), + ('BFPV'), + ('Robert'), + ('Rupert'), + ('robert'), + ('rObErT'), + ('Müller'), + ('Abcdefghijklmnop'), + ('Lloyd'), + ('BWB'), + ('BHB'), + ('Tymczak'), + ('Aeiou'), + ('1Robert'), + ('Smith-Jones'), + ('#'), + ('\nhello'), + (' '); query SELECT s, soundex(s) FROM test_soundex @@ -29,3 +63,7 @@ SELECT s, soundex(s) FROM test_soundex -- literal arguments query SELECT soundex('Miller'), soundex('Tymczak') + +-- additional test for soundex with concat +query +SELECT concat(soundex(' '), 'Spark') as concat_spark;